[gcc]
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobf9e473932885111ae7ca09482709bc385d45af3c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-walk.h"
60 #include "intl.h"
61 #include "params.h"
62 #include "tm-constrs.h"
63 #include "tree-vectorizer.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "context.h"
67 #include "tree-pass.h"
68 #if TARGET_XCOFF
69 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
70 #endif
71 #if TARGET_MACHO
72 #include "gstab.h" /* for N_SLINE */
73 #endif
74 #include "case-cfn-macros.h"
75 #include "ppc-auxv.h"
77 /* This file should be included last. */
78 #include "target-def.h"
80 #ifndef TARGET_NO_PROTOTYPE
81 #define TARGET_NO_PROTOTYPE 0
82 #endif
84 #define min(A,B) ((A) < (B) ? (A) : (B))
85 #define max(A,B) ((A) > (B) ? (A) : (B))
87 /* Structure used to define the rs6000 stack */
88 typedef struct rs6000_stack {
89 int reload_completed; /* stack info won't change from here on */
90 int first_gp_reg_save; /* first callee saved GP register used */
91 int first_fp_reg_save; /* first callee saved FP register used */
92 int first_altivec_reg_save; /* first callee saved AltiVec register used */
93 int lr_save_p; /* true if the link reg needs to be saved */
94 int cr_save_p; /* true if the CR reg needs to be saved */
95 unsigned int vrsave_mask; /* mask of vec registers to save */
96 int push_p; /* true if we need to allocate stack space */
97 int calls_p; /* true if the function makes any calls */
98 int world_save_p; /* true if we're saving *everything*:
99 r13-r31, cr, f14-f31, vrsave, v20-v31 */
100 enum rs6000_abi abi; /* which ABI to use */
101 int gp_save_offset; /* offset to save GP regs from initial SP */
102 int fp_save_offset; /* offset to save FP regs from initial SP */
103 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
104 int lr_save_offset; /* offset to save LR from initial SP */
105 int cr_save_offset; /* offset to save CR from initial SP */
106 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
107 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
108 int varargs_save_offset; /* offset to save the varargs registers */
109 int ehrd_offset; /* offset to EH return data */
110 int ehcr_offset; /* offset to EH CR field data */
111 int reg_size; /* register size (4 or 8) */
112 HOST_WIDE_INT vars_size; /* variable save area size */
113 int parm_size; /* outgoing parameter size */
114 int save_size; /* save area size */
115 int fixed_size; /* fixed size of stack frame */
116 int gp_size; /* size of saved GP registers */
117 int fp_size; /* size of saved FP registers */
118 int altivec_size; /* size of saved AltiVec registers */
119 int cr_size; /* size to hold CR if not in fixed area */
120 int vrsave_size; /* size to hold VRSAVE */
121 int altivec_padding_size; /* size of altivec alignment padding */
122 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
123 int spe_padding_size;
124 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
125 int spe_64bit_regs_used;
126 int savres_strategy;
127 } rs6000_stack_t;
129 /* A C structure for machine-specific, per-function data.
130 This is added to the cfun structure. */
131 typedef struct GTY(()) machine_function
133 /* Whether the instruction chain has been scanned already. */
134 int spe_insn_chain_scanned_p;
135 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
136 int ra_needs_full_frame;
137 /* Flags if __builtin_return_address (0) was used. */
138 int ra_need_lr;
139 /* Cache lr_save_p after expansion of builtin_eh_return. */
140 int lr_save_state;
141 /* Whether we need to save the TOC to the reserved stack location in the
142 function prologue. */
143 bool save_toc_in_prologue;
144 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
145 varargs save area. */
146 HOST_WIDE_INT varargs_save_offset;
147 /* Temporary stack slot to use for SDmode copies. This slot is
148 64-bits wide and is allocated early enough so that the offset
149 does not overflow the 16-bit load/store offset field. */
150 rtx sdmode_stack_slot;
151 /* Alternative internal arg pointer for -fsplit-stack. */
152 rtx split_stack_arg_pointer;
153 bool split_stack_argp_used;
154 /* Flag if r2 setup is needed with ELFv2 ABI. */
155 bool r2_setup_needed;
156 /* The components already handled by separate shrink-wrapping, which should
157 not be considered by the prologue and epilogue. */
158 bool gpr_is_wrapped_separately[32];
159 bool lr_is_wrapped_separately;
160 } machine_function;
162 /* Support targetm.vectorize.builtin_mask_for_load. */
163 static GTY(()) tree altivec_builtin_mask_for_load;
165 /* Set to nonzero once AIX common-mode calls have been defined. */
166 static GTY(()) int common_mode_defined;
168 /* Label number of label created for -mrelocatable, to call to so we can
169 get the address of the GOT section */
170 static int rs6000_pic_labelno;
172 #ifdef USING_ELFOS_H
173 /* Counter for labels which are to be placed in .fixup. */
174 int fixuplabelno = 0;
175 #endif
177 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
178 int dot_symbols;
180 /* Specify the machine mode that pointers have. After generation of rtl, the
181 compiler makes no further distinction between pointers and any other objects
182 of this machine mode. The type is unsigned since not all things that
183 include rs6000.h also include machmode.h. */
184 unsigned rs6000_pmode;
186 /* Width in bits of a pointer. */
187 unsigned rs6000_pointer_size;
189 #ifdef HAVE_AS_GNU_ATTRIBUTE
190 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
191 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
192 # endif
193 /* Flag whether floating point values have been passed/returned.
194 Note that this doesn't say whether fprs are used, since the
195 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
196 should be set for soft-float values passed in gprs and ieee128
197 values passed in vsx registers. */
198 static bool rs6000_passes_float;
199 static bool rs6000_passes_long_double;
200 /* Flag whether vector values have been passed/returned. */
201 static bool rs6000_passes_vector;
202 /* Flag whether small (<= 8 byte) structures have been returned. */
203 static bool rs6000_returns_struct;
204 #endif
206 /* Value is TRUE if register/mode pair is acceptable. */
207 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
209 /* Maximum number of registers needed for a given register class and mode. */
210 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
212 /* How many registers are needed for a given register and mode. */
213 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
215 /* Map register number to register class. */
216 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
218 static int dbg_cost_ctrl;
220 /* Built in types. */
221 tree rs6000_builtin_types[RS6000_BTI_MAX];
222 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
224 /* Flag to say the TOC is initialized */
225 int toc_initialized, need_toc_init;
226 char toc_label_name[10];
228 /* Cached value of rs6000_variable_issue. This is cached in
229 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
230 static short cached_can_issue_more;
232 static GTY(()) section *read_only_data_section;
233 static GTY(()) section *private_data_section;
234 static GTY(()) section *tls_data_section;
235 static GTY(()) section *tls_private_data_section;
236 static GTY(()) section *read_only_private_data_section;
237 static GTY(()) section *sdata2_section;
238 static GTY(()) section *toc_section;
240 struct builtin_description
242 const HOST_WIDE_INT mask;
243 const enum insn_code icode;
244 const char *const name;
245 const enum rs6000_builtins code;
248 /* Describe the vector unit used for modes. */
249 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
250 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
252 /* Register classes for various constraints that are based on the target
253 switches. */
254 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
256 /* Describe the alignment of a vector. */
257 int rs6000_vector_align[NUM_MACHINE_MODES];
259 /* Map selected modes to types for builtins. */
260 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
262 /* What modes to automatically generate reciprocal divide estimate (fre) and
263 reciprocal sqrt (frsqrte) for. */
264 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
266 /* Masks to determine which reciprocal esitmate instructions to generate
267 automatically. */
268 enum rs6000_recip_mask {
269 RECIP_SF_DIV = 0x001, /* Use divide estimate */
270 RECIP_DF_DIV = 0x002,
271 RECIP_V4SF_DIV = 0x004,
272 RECIP_V2DF_DIV = 0x008,
274 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
275 RECIP_DF_RSQRT = 0x020,
276 RECIP_V4SF_RSQRT = 0x040,
277 RECIP_V2DF_RSQRT = 0x080,
279 /* Various combination of flags for -mrecip=xxx. */
280 RECIP_NONE = 0,
281 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
282 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
283 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
285 RECIP_HIGH_PRECISION = RECIP_ALL,
287 /* On low precision machines like the power5, don't enable double precision
288 reciprocal square root estimate, since it isn't accurate enough. */
289 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
292 /* -mrecip options. */
293 static struct
295 const char *string; /* option name */
296 unsigned int mask; /* mask bits to set */
297 } recip_options[] = {
298 { "all", RECIP_ALL },
299 { "none", RECIP_NONE },
300 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
301 | RECIP_V2DF_DIV) },
302 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
303 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
304 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
305 | RECIP_V2DF_RSQRT) },
306 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
307 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
310 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
311 static const struct
313 const char *cpu;
314 unsigned int cpuid;
315 } cpu_is_info[] = {
316 { "power9", PPC_PLATFORM_POWER9 },
317 { "power8", PPC_PLATFORM_POWER8 },
318 { "power7", PPC_PLATFORM_POWER7 },
319 { "power6x", PPC_PLATFORM_POWER6X },
320 { "power6", PPC_PLATFORM_POWER6 },
321 { "power5+", PPC_PLATFORM_POWER5_PLUS },
322 { "power5", PPC_PLATFORM_POWER5 },
323 { "ppc970", PPC_PLATFORM_PPC970 },
324 { "power4", PPC_PLATFORM_POWER4 },
325 { "ppca2", PPC_PLATFORM_PPCA2 },
326 { "ppc476", PPC_PLATFORM_PPC476 },
327 { "ppc464", PPC_PLATFORM_PPC464 },
328 { "ppc440", PPC_PLATFORM_PPC440 },
329 { "ppc405", PPC_PLATFORM_PPC405 },
330 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
333 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
334 static const struct
336 const char *hwcap;
337 int mask;
338 unsigned int id;
339 } cpu_supports_info[] = {
340 /* AT_HWCAP masks. */
341 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
342 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
343 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
344 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
345 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
346 { "booke", PPC_FEATURE_BOOKE, 0 },
347 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
348 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
349 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
350 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
351 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
352 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
353 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
354 { "notb", PPC_FEATURE_NO_TB, 0 },
355 { "pa6t", PPC_FEATURE_PA6T, 0 },
356 { "power4", PPC_FEATURE_POWER4, 0 },
357 { "power5", PPC_FEATURE_POWER5, 0 },
358 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
359 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
360 { "ppc32", PPC_FEATURE_32, 0 },
361 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
362 { "ppc64", PPC_FEATURE_64, 0 },
363 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
364 { "smt", PPC_FEATURE_SMT, 0 },
365 { "spe", PPC_FEATURE_HAS_SPE, 0 },
366 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
367 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
368 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
370 /* AT_HWCAP2 masks. */
371 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
372 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
373 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
374 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
375 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
376 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
377 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
378 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
379 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
380 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
383 /* Newer LIBCs explicitly export this symbol to declare that they provide
384 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
385 reference to this symbol whenever we expand a CPU builtin, so that
386 we never link against an old LIBC. */
387 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
389 /* True if we have expanded a CPU builtin. */
390 bool cpu_builtin_p;
392 /* Pointer to function (in rs6000-c.c) that can define or undefine target
393 macros that have changed. Languages that don't support the preprocessor
394 don't link in rs6000-c.c, so we can't call it directly. */
395 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
397 /* Simplfy register classes into simpler classifications. We assume
398 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
399 check for standard register classes (gpr/floating/altivec/vsx) and
400 floating/vector classes (float/altivec/vsx). */
402 enum rs6000_reg_type {
403 NO_REG_TYPE,
404 PSEUDO_REG_TYPE,
405 GPR_REG_TYPE,
406 VSX_REG_TYPE,
407 ALTIVEC_REG_TYPE,
408 FPR_REG_TYPE,
409 SPR_REG_TYPE,
410 CR_REG_TYPE,
411 SPE_ACC_TYPE,
412 SPEFSCR_REG_TYPE
415 /* Map register class to register type. */
416 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
418 /* First/last register type for the 'normal' register types (i.e. general
419 purpose, floating point, altivec, and VSX registers). */
420 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
422 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
425 /* Register classes we care about in secondary reload or go if legitimate
426 address. We only need to worry about GPR, FPR, and Altivec registers here,
427 along an ANY field that is the OR of the 3 register classes. */
429 enum rs6000_reload_reg_type {
430 RELOAD_REG_GPR, /* General purpose registers. */
431 RELOAD_REG_FPR, /* Traditional floating point regs. */
432 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
433 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
434 N_RELOAD_REG
437 /* For setting up register classes, loop through the 3 register classes mapping
438 into real registers, and skip the ANY class, which is just an OR of the
439 bits. */
440 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
441 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
443 /* Map reload register type to a register in the register class. */
444 struct reload_reg_map_type {
445 const char *name; /* Register class name. */
446 int reg; /* Register in the register class. */
449 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
450 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
451 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
452 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
453 { "Any", -1 }, /* RELOAD_REG_ANY. */
456 /* Mask bits for each register class, indexed per mode. Historically the
457 compiler has been more restrictive which types can do PRE_MODIFY instead of
458 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
459 typedef unsigned char addr_mask_type;
461 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
462 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
463 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
464 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
465 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
466 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
467 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
468 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
470 /* Register type masks based on the type, of valid addressing modes. */
471 struct rs6000_reg_addr {
472 enum insn_code reload_load; /* INSN to reload for loading. */
473 enum insn_code reload_store; /* INSN to reload for storing. */
474 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
475 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
476 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
477 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
478 /* INSNs for fusing addi with loads
479 or stores for each reg. class. */
480 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
481 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
482 /* INSNs for fusing addis with loads
483 or stores for each reg. class. */
484 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
485 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
486 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
487 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
488 bool fused_toc; /* Mode supports TOC fusion. */
491 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
493 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
494 static inline bool
495 mode_supports_pre_incdec_p (machine_mode mode)
497 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
498 != 0);
501 /* Helper function to say whether a mode supports PRE_MODIFY. */
502 static inline bool
503 mode_supports_pre_modify_p (machine_mode mode)
505 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
506 != 0);
509 /* Return true if we have D-form addressing in altivec registers. */
510 static inline bool
511 mode_supports_vmx_dform (machine_mode mode)
513 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
516 /* Return true if we have D-form addressing in VSX registers. This addressing
517 is more limited than normal d-form addressing in that the offset must be
518 aligned on a 16-byte boundary. */
519 static inline bool
520 mode_supports_vsx_dform_quad (machine_mode mode)
522 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
523 != 0);
527 /* Target cpu costs. */
529 struct processor_costs {
530 const int mulsi; /* cost of SImode multiplication. */
531 const int mulsi_const; /* cost of SImode multiplication by constant. */
532 const int mulsi_const9; /* cost of SImode mult by short constant. */
533 const int muldi; /* cost of DImode multiplication. */
534 const int divsi; /* cost of SImode division. */
535 const int divdi; /* cost of DImode division. */
536 const int fp; /* cost of simple SFmode and DFmode insns. */
537 const int dmul; /* cost of DFmode multiplication (and fmadd). */
538 const int sdiv; /* cost of SFmode division (fdivs). */
539 const int ddiv; /* cost of DFmode division (fdiv). */
540 const int cache_line_size; /* cache line size in bytes. */
541 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
542 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
543 const int simultaneous_prefetches; /* number of parallel prefetch
544 operations. */
545 const int sfdf_convert; /* cost of SF->DF conversion. */
548 const struct processor_costs *rs6000_cost;
550 /* Processor costs (relative to an add) */
552 /* Instruction size costs on 32bit processors. */
553 static const
554 struct processor_costs size32_cost = {
555 COSTS_N_INSNS (1), /* mulsi */
556 COSTS_N_INSNS (1), /* mulsi_const */
557 COSTS_N_INSNS (1), /* mulsi_const9 */
558 COSTS_N_INSNS (1), /* muldi */
559 COSTS_N_INSNS (1), /* divsi */
560 COSTS_N_INSNS (1), /* divdi */
561 COSTS_N_INSNS (1), /* fp */
562 COSTS_N_INSNS (1), /* dmul */
563 COSTS_N_INSNS (1), /* sdiv */
564 COSTS_N_INSNS (1), /* ddiv */
565 32, /* cache line size */
566 0, /* l1 cache */
567 0, /* l2 cache */
568 0, /* streams */
569 0, /* SF->DF convert */
572 /* Instruction size costs on 64bit processors. */
573 static const
574 struct processor_costs size64_cost = {
575 COSTS_N_INSNS (1), /* mulsi */
576 COSTS_N_INSNS (1), /* mulsi_const */
577 COSTS_N_INSNS (1), /* mulsi_const9 */
578 COSTS_N_INSNS (1), /* muldi */
579 COSTS_N_INSNS (1), /* divsi */
580 COSTS_N_INSNS (1), /* divdi */
581 COSTS_N_INSNS (1), /* fp */
582 COSTS_N_INSNS (1), /* dmul */
583 COSTS_N_INSNS (1), /* sdiv */
584 COSTS_N_INSNS (1), /* ddiv */
585 128, /* cache line size */
586 0, /* l1 cache */
587 0, /* l2 cache */
588 0, /* streams */
589 0, /* SF->DF convert */
592 /* Instruction costs on RS64A processors. */
593 static const
594 struct processor_costs rs64a_cost = {
595 COSTS_N_INSNS (20), /* mulsi */
596 COSTS_N_INSNS (12), /* mulsi_const */
597 COSTS_N_INSNS (8), /* mulsi_const9 */
598 COSTS_N_INSNS (34), /* muldi */
599 COSTS_N_INSNS (65), /* divsi */
600 COSTS_N_INSNS (67), /* divdi */
601 COSTS_N_INSNS (4), /* fp */
602 COSTS_N_INSNS (4), /* dmul */
603 COSTS_N_INSNS (31), /* sdiv */
604 COSTS_N_INSNS (31), /* ddiv */
605 128, /* cache line size */
606 128, /* l1 cache */
607 2048, /* l2 cache */
608 1, /* streams */
609 0, /* SF->DF convert */
612 /* Instruction costs on MPCCORE processors. */
613 static const
614 struct processor_costs mpccore_cost = {
615 COSTS_N_INSNS (2), /* mulsi */
616 COSTS_N_INSNS (2), /* mulsi_const */
617 COSTS_N_INSNS (2), /* mulsi_const9 */
618 COSTS_N_INSNS (2), /* muldi */
619 COSTS_N_INSNS (6), /* divsi */
620 COSTS_N_INSNS (6), /* divdi */
621 COSTS_N_INSNS (4), /* fp */
622 COSTS_N_INSNS (5), /* dmul */
623 COSTS_N_INSNS (10), /* sdiv */
624 COSTS_N_INSNS (17), /* ddiv */
625 32, /* cache line size */
626 4, /* l1 cache */
627 16, /* l2 cache */
628 1, /* streams */
629 0, /* SF->DF convert */
632 /* Instruction costs on PPC403 processors. */
633 static const
634 struct processor_costs ppc403_cost = {
635 COSTS_N_INSNS (4), /* mulsi */
636 COSTS_N_INSNS (4), /* mulsi_const */
637 COSTS_N_INSNS (4), /* mulsi_const9 */
638 COSTS_N_INSNS (4), /* muldi */
639 COSTS_N_INSNS (33), /* divsi */
640 COSTS_N_INSNS (33), /* divdi */
641 COSTS_N_INSNS (11), /* fp */
642 COSTS_N_INSNS (11), /* dmul */
643 COSTS_N_INSNS (11), /* sdiv */
644 COSTS_N_INSNS (11), /* ddiv */
645 32, /* cache line size */
646 4, /* l1 cache */
647 16, /* l2 cache */
648 1, /* streams */
649 0, /* SF->DF convert */
652 /* Instruction costs on PPC405 processors. */
653 static const
654 struct processor_costs ppc405_cost = {
655 COSTS_N_INSNS (5), /* mulsi */
656 COSTS_N_INSNS (4), /* mulsi_const */
657 COSTS_N_INSNS (3), /* mulsi_const9 */
658 COSTS_N_INSNS (5), /* muldi */
659 COSTS_N_INSNS (35), /* divsi */
660 COSTS_N_INSNS (35), /* divdi */
661 COSTS_N_INSNS (11), /* fp */
662 COSTS_N_INSNS (11), /* dmul */
663 COSTS_N_INSNS (11), /* sdiv */
664 COSTS_N_INSNS (11), /* ddiv */
665 32, /* cache line size */
666 16, /* l1 cache */
667 128, /* l2 cache */
668 1, /* streams */
669 0, /* SF->DF convert */
672 /* Instruction costs on PPC440 processors. */
673 static const
674 struct processor_costs ppc440_cost = {
675 COSTS_N_INSNS (3), /* mulsi */
676 COSTS_N_INSNS (2), /* mulsi_const */
677 COSTS_N_INSNS (2), /* mulsi_const9 */
678 COSTS_N_INSNS (3), /* muldi */
679 COSTS_N_INSNS (34), /* divsi */
680 COSTS_N_INSNS (34), /* divdi */
681 COSTS_N_INSNS (5), /* fp */
682 COSTS_N_INSNS (5), /* dmul */
683 COSTS_N_INSNS (19), /* sdiv */
684 COSTS_N_INSNS (33), /* ddiv */
685 32, /* cache line size */
686 32, /* l1 cache */
687 256, /* l2 cache */
688 1, /* streams */
689 0, /* SF->DF convert */
692 /* Instruction costs on PPC476 processors. */
693 static const
694 struct processor_costs ppc476_cost = {
695 COSTS_N_INSNS (4), /* mulsi */
696 COSTS_N_INSNS (4), /* mulsi_const */
697 COSTS_N_INSNS (4), /* mulsi_const9 */
698 COSTS_N_INSNS (4), /* muldi */
699 COSTS_N_INSNS (11), /* divsi */
700 COSTS_N_INSNS (11), /* divdi */
701 COSTS_N_INSNS (6), /* fp */
702 COSTS_N_INSNS (6), /* dmul */
703 COSTS_N_INSNS (19), /* sdiv */
704 COSTS_N_INSNS (33), /* ddiv */
705 32, /* l1 cache line size */
706 32, /* l1 cache */
707 512, /* l2 cache */
708 1, /* streams */
709 0, /* SF->DF convert */
712 /* Instruction costs on PPC601 processors. */
713 static const
714 struct processor_costs ppc601_cost = {
715 COSTS_N_INSNS (5), /* mulsi */
716 COSTS_N_INSNS (5), /* mulsi_const */
717 COSTS_N_INSNS (5), /* mulsi_const9 */
718 COSTS_N_INSNS (5), /* muldi */
719 COSTS_N_INSNS (36), /* divsi */
720 COSTS_N_INSNS (36), /* divdi */
721 COSTS_N_INSNS (4), /* fp */
722 COSTS_N_INSNS (5), /* dmul */
723 COSTS_N_INSNS (17), /* sdiv */
724 COSTS_N_INSNS (31), /* ddiv */
725 32, /* cache line size */
726 32, /* l1 cache */
727 256, /* l2 cache */
728 1, /* streams */
729 0, /* SF->DF convert */
732 /* Instruction costs on PPC603 processors. */
733 static const
734 struct processor_costs ppc603_cost = {
735 COSTS_N_INSNS (5), /* mulsi */
736 COSTS_N_INSNS (3), /* mulsi_const */
737 COSTS_N_INSNS (2), /* mulsi_const9 */
738 COSTS_N_INSNS (5), /* muldi */
739 COSTS_N_INSNS (37), /* divsi */
740 COSTS_N_INSNS (37), /* divdi */
741 COSTS_N_INSNS (3), /* fp */
742 COSTS_N_INSNS (4), /* dmul */
743 COSTS_N_INSNS (18), /* sdiv */
744 COSTS_N_INSNS (33), /* ddiv */
745 32, /* cache line size */
746 8, /* l1 cache */
747 64, /* l2 cache */
748 1, /* streams */
749 0, /* SF->DF convert */
752 /* Instruction costs on PPC604 processors. */
753 static const
754 struct processor_costs ppc604_cost = {
755 COSTS_N_INSNS (4), /* mulsi */
756 COSTS_N_INSNS (4), /* mulsi_const */
757 COSTS_N_INSNS (4), /* mulsi_const9 */
758 COSTS_N_INSNS (4), /* muldi */
759 COSTS_N_INSNS (20), /* divsi */
760 COSTS_N_INSNS (20), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (18), /* sdiv */
764 COSTS_N_INSNS (32), /* ddiv */
765 32, /* cache line size */
766 16, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
769 0, /* SF->DF convert */
772 /* Instruction costs on PPC604e processors. */
773 static const
774 struct processor_costs ppc604e_cost = {
775 COSTS_N_INSNS (2), /* mulsi */
776 COSTS_N_INSNS (2), /* mulsi_const */
777 COSTS_N_INSNS (2), /* mulsi_const9 */
778 COSTS_N_INSNS (2), /* muldi */
779 COSTS_N_INSNS (20), /* divsi */
780 COSTS_N_INSNS (20), /* divdi */
781 COSTS_N_INSNS (3), /* fp */
782 COSTS_N_INSNS (3), /* dmul */
783 COSTS_N_INSNS (18), /* sdiv */
784 COSTS_N_INSNS (32), /* ddiv */
785 32, /* cache line size */
786 32, /* l1 cache */
787 1024, /* l2 cache */
788 1, /* streams */
789 0, /* SF->DF convert */
792 /* Instruction costs on PPC620 processors. */
793 static const
794 struct processor_costs ppc620_cost = {
795 COSTS_N_INSNS (5), /* mulsi */
796 COSTS_N_INSNS (4), /* mulsi_const */
797 COSTS_N_INSNS (3), /* mulsi_const9 */
798 COSTS_N_INSNS (7), /* muldi */
799 COSTS_N_INSNS (21), /* divsi */
800 COSTS_N_INSNS (37), /* divdi */
801 COSTS_N_INSNS (3), /* fp */
802 COSTS_N_INSNS (3), /* dmul */
803 COSTS_N_INSNS (18), /* sdiv */
804 COSTS_N_INSNS (32), /* ddiv */
805 128, /* cache line size */
806 32, /* l1 cache */
807 1024, /* l2 cache */
808 1, /* streams */
809 0, /* SF->DF convert */
812 /* Instruction costs on PPC630 processors. */
813 static const
814 struct processor_costs ppc630_cost = {
815 COSTS_N_INSNS (5), /* mulsi */
816 COSTS_N_INSNS (4), /* mulsi_const */
817 COSTS_N_INSNS (3), /* mulsi_const9 */
818 COSTS_N_INSNS (7), /* muldi */
819 COSTS_N_INSNS (21), /* divsi */
820 COSTS_N_INSNS (37), /* divdi */
821 COSTS_N_INSNS (3), /* fp */
822 COSTS_N_INSNS (3), /* dmul */
823 COSTS_N_INSNS (17), /* sdiv */
824 COSTS_N_INSNS (21), /* ddiv */
825 128, /* cache line size */
826 64, /* l1 cache */
827 1024, /* l2 cache */
828 1, /* streams */
829 0, /* SF->DF convert */
832 /* Instruction costs on Cell processor. */
833 /* COSTS_N_INSNS (1) ~ one add. */
834 static const
835 struct processor_costs ppccell_cost = {
836 COSTS_N_INSNS (9/2)+2, /* mulsi */
837 COSTS_N_INSNS (6/2), /* mulsi_const */
838 COSTS_N_INSNS (6/2), /* mulsi_const9 */
839 COSTS_N_INSNS (15/2)+2, /* muldi */
840 COSTS_N_INSNS (38/2), /* divsi */
841 COSTS_N_INSNS (70/2), /* divdi */
842 COSTS_N_INSNS (10/2), /* fp */
843 COSTS_N_INSNS (10/2), /* dmul */
844 COSTS_N_INSNS (74/2), /* sdiv */
845 COSTS_N_INSNS (74/2), /* ddiv */
846 128, /* cache line size */
847 32, /* l1 cache */
848 512, /* l2 cache */
849 6, /* streams */
850 0, /* SF->DF convert */
853 /* Instruction costs on PPC750 and PPC7400 processors. */
854 static const
855 struct processor_costs ppc750_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (3), /* mulsi_const */
858 COSTS_N_INSNS (2), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (17), /* divsi */
861 COSTS_N_INSNS (17), /* divdi */
862 COSTS_N_INSNS (3), /* fp */
863 COSTS_N_INSNS (3), /* dmul */
864 COSTS_N_INSNS (17), /* sdiv */
865 COSTS_N_INSNS (31), /* ddiv */
866 32, /* cache line size */
867 32, /* l1 cache */
868 512, /* l2 cache */
869 1, /* streams */
870 0, /* SF->DF convert */
873 /* Instruction costs on PPC7450 processors. */
874 static const
875 struct processor_costs ppc7450_cost = {
876 COSTS_N_INSNS (4), /* mulsi */
877 COSTS_N_INSNS (3), /* mulsi_const */
878 COSTS_N_INSNS (3), /* mulsi_const9 */
879 COSTS_N_INSNS (4), /* muldi */
880 COSTS_N_INSNS (23), /* divsi */
881 COSTS_N_INSNS (23), /* divdi */
882 COSTS_N_INSNS (5), /* fp */
883 COSTS_N_INSNS (5), /* dmul */
884 COSTS_N_INSNS (21), /* sdiv */
885 COSTS_N_INSNS (35), /* ddiv */
886 32, /* cache line size */
887 32, /* l1 cache */
888 1024, /* l2 cache */
889 1, /* streams */
890 0, /* SF->DF convert */
893 /* Instruction costs on PPC8540 processors. */
894 static const
895 struct processor_costs ppc8540_cost = {
896 COSTS_N_INSNS (4), /* mulsi */
897 COSTS_N_INSNS (4), /* mulsi_const */
898 COSTS_N_INSNS (4), /* mulsi_const9 */
899 COSTS_N_INSNS (4), /* muldi */
900 COSTS_N_INSNS (19), /* divsi */
901 COSTS_N_INSNS (19), /* divdi */
902 COSTS_N_INSNS (4), /* fp */
903 COSTS_N_INSNS (4), /* dmul */
904 COSTS_N_INSNS (29), /* sdiv */
905 COSTS_N_INSNS (29), /* ddiv */
906 32, /* cache line size */
907 32, /* l1 cache */
908 256, /* l2 cache */
909 1, /* prefetch streams /*/
910 0, /* SF->DF convert */
913 /* Instruction costs on E300C2 and E300C3 cores. */
914 static const
915 struct processor_costs ppce300c2c3_cost = {
916 COSTS_N_INSNS (4), /* mulsi */
917 COSTS_N_INSNS (4), /* mulsi_const */
918 COSTS_N_INSNS (4), /* mulsi_const9 */
919 COSTS_N_INSNS (4), /* muldi */
920 COSTS_N_INSNS (19), /* divsi */
921 COSTS_N_INSNS (19), /* divdi */
922 COSTS_N_INSNS (3), /* fp */
923 COSTS_N_INSNS (4), /* dmul */
924 COSTS_N_INSNS (18), /* sdiv */
925 COSTS_N_INSNS (33), /* ddiv */
927 16, /* l1 cache */
928 16, /* l2 cache */
929 1, /* prefetch streams /*/
930 0, /* SF->DF convert */
933 /* Instruction costs on PPCE500MC processors. */
934 static const
935 struct processor_costs ppce500mc_cost = {
936 COSTS_N_INSNS (4), /* mulsi */
937 COSTS_N_INSNS (4), /* mulsi_const */
938 COSTS_N_INSNS (4), /* mulsi_const9 */
939 COSTS_N_INSNS (4), /* muldi */
940 COSTS_N_INSNS (14), /* divsi */
941 COSTS_N_INSNS (14), /* divdi */
942 COSTS_N_INSNS (8), /* fp */
943 COSTS_N_INSNS (10), /* dmul */
944 COSTS_N_INSNS (36), /* sdiv */
945 COSTS_N_INSNS (66), /* ddiv */
946 64, /* cache line size */
947 32, /* l1 cache */
948 128, /* l2 cache */
949 1, /* prefetch streams /*/
950 0, /* SF->DF convert */
953 /* Instruction costs on PPCE500MC64 processors. */
954 static const
955 struct processor_costs ppce500mc64_cost = {
956 COSTS_N_INSNS (4), /* mulsi */
957 COSTS_N_INSNS (4), /* mulsi_const */
958 COSTS_N_INSNS (4), /* mulsi_const9 */
959 COSTS_N_INSNS (4), /* muldi */
960 COSTS_N_INSNS (14), /* divsi */
961 COSTS_N_INSNS (14), /* divdi */
962 COSTS_N_INSNS (4), /* fp */
963 COSTS_N_INSNS (10), /* dmul */
964 COSTS_N_INSNS (36), /* sdiv */
965 COSTS_N_INSNS (66), /* ddiv */
966 64, /* cache line size */
967 32, /* l1 cache */
968 128, /* l2 cache */
969 1, /* prefetch streams /*/
970 0, /* SF->DF convert */
973 /* Instruction costs on PPCE5500 processors. */
974 static const
975 struct processor_costs ppce5500_cost = {
976 COSTS_N_INSNS (5), /* mulsi */
977 COSTS_N_INSNS (5), /* mulsi_const */
978 COSTS_N_INSNS (4), /* mulsi_const9 */
979 COSTS_N_INSNS (5), /* muldi */
980 COSTS_N_INSNS (14), /* divsi */
981 COSTS_N_INSNS (14), /* divdi */
982 COSTS_N_INSNS (7), /* fp */
983 COSTS_N_INSNS (10), /* dmul */
984 COSTS_N_INSNS (36), /* sdiv */
985 COSTS_N_INSNS (66), /* ddiv */
986 64, /* cache line size */
987 32, /* l1 cache */
988 128, /* l2 cache */
989 1, /* prefetch streams /*/
990 0, /* SF->DF convert */
993 /* Instruction costs on PPCE6500 processors. */
994 static const
995 struct processor_costs ppce6500_cost = {
996 COSTS_N_INSNS (5), /* mulsi */
997 COSTS_N_INSNS (5), /* mulsi_const */
998 COSTS_N_INSNS (4), /* mulsi_const9 */
999 COSTS_N_INSNS (5), /* muldi */
1000 COSTS_N_INSNS (14), /* divsi */
1001 COSTS_N_INSNS (14), /* divdi */
1002 COSTS_N_INSNS (7), /* fp */
1003 COSTS_N_INSNS (10), /* dmul */
1004 COSTS_N_INSNS (36), /* sdiv */
1005 COSTS_N_INSNS (66), /* ddiv */
1006 64, /* cache line size */
1007 32, /* l1 cache */
1008 128, /* l2 cache */
1009 1, /* prefetch streams /*/
1010 0, /* SF->DF convert */
1013 /* Instruction costs on AppliedMicro Titan processors. */
1014 static const
1015 struct processor_costs titan_cost = {
1016 COSTS_N_INSNS (5), /* mulsi */
1017 COSTS_N_INSNS (5), /* mulsi_const */
1018 COSTS_N_INSNS (5), /* mulsi_const9 */
1019 COSTS_N_INSNS (5), /* muldi */
1020 COSTS_N_INSNS (18), /* divsi */
1021 COSTS_N_INSNS (18), /* divdi */
1022 COSTS_N_INSNS (10), /* fp */
1023 COSTS_N_INSNS (10), /* dmul */
1024 COSTS_N_INSNS (46), /* sdiv */
1025 COSTS_N_INSNS (72), /* ddiv */
1026 32, /* cache line size */
1027 32, /* l1 cache */
1028 512, /* l2 cache */
1029 1, /* prefetch streams /*/
1030 0, /* SF->DF convert */
1033 /* Instruction costs on POWER4 and POWER5 processors. */
1034 static const
1035 struct processor_costs power4_cost = {
1036 COSTS_N_INSNS (3), /* mulsi */
1037 COSTS_N_INSNS (2), /* mulsi_const */
1038 COSTS_N_INSNS (2), /* mulsi_const9 */
1039 COSTS_N_INSNS (4), /* muldi */
1040 COSTS_N_INSNS (18), /* divsi */
1041 COSTS_N_INSNS (34), /* divdi */
1042 COSTS_N_INSNS (3), /* fp */
1043 COSTS_N_INSNS (3), /* dmul */
1044 COSTS_N_INSNS (17), /* sdiv */
1045 COSTS_N_INSNS (17), /* ddiv */
1046 128, /* cache line size */
1047 32, /* l1 cache */
1048 1024, /* l2 cache */
1049 8, /* prefetch streams /*/
1050 0, /* SF->DF convert */
1053 /* Instruction costs on POWER6 processors. */
1054 static const
1055 struct processor_costs power6_cost = {
1056 COSTS_N_INSNS (8), /* mulsi */
1057 COSTS_N_INSNS (8), /* mulsi_const */
1058 COSTS_N_INSNS (8), /* mulsi_const9 */
1059 COSTS_N_INSNS (8), /* muldi */
1060 COSTS_N_INSNS (22), /* divsi */
1061 COSTS_N_INSNS (28), /* divdi */
1062 COSTS_N_INSNS (3), /* fp */
1063 COSTS_N_INSNS (3), /* dmul */
1064 COSTS_N_INSNS (13), /* sdiv */
1065 COSTS_N_INSNS (16), /* ddiv */
1066 128, /* cache line size */
1067 64, /* l1 cache */
1068 2048, /* l2 cache */
1069 16, /* prefetch streams */
1070 0, /* SF->DF convert */
1073 /* Instruction costs on POWER7 processors. */
1074 static const
1075 struct processor_costs power7_cost = {
1076 COSTS_N_INSNS (2), /* mulsi */
1077 COSTS_N_INSNS (2), /* mulsi_const */
1078 COSTS_N_INSNS (2), /* mulsi_const9 */
1079 COSTS_N_INSNS (2), /* muldi */
1080 COSTS_N_INSNS (18), /* divsi */
1081 COSTS_N_INSNS (34), /* divdi */
1082 COSTS_N_INSNS (3), /* fp */
1083 COSTS_N_INSNS (3), /* dmul */
1084 COSTS_N_INSNS (13), /* sdiv */
1085 COSTS_N_INSNS (16), /* ddiv */
1086 128, /* cache line size */
1087 32, /* l1 cache */
1088 256, /* l2 cache */
1089 12, /* prefetch streams */
1090 COSTS_N_INSNS (3), /* SF->DF convert */
1093 /* Instruction costs on POWER8 processors. */
1094 static const
1095 struct processor_costs power8_cost = {
1096 COSTS_N_INSNS (3), /* mulsi */
1097 COSTS_N_INSNS (3), /* mulsi_const */
1098 COSTS_N_INSNS (3), /* mulsi_const9 */
1099 COSTS_N_INSNS (3), /* muldi */
1100 COSTS_N_INSNS (19), /* divsi */
1101 COSTS_N_INSNS (35), /* divdi */
1102 COSTS_N_INSNS (3), /* fp */
1103 COSTS_N_INSNS (3), /* dmul */
1104 COSTS_N_INSNS (14), /* sdiv */
1105 COSTS_N_INSNS (17), /* ddiv */
1106 128, /* cache line size */
1107 32, /* l1 cache */
1108 256, /* l2 cache */
1109 12, /* prefetch streams */
1110 COSTS_N_INSNS (3), /* SF->DF convert */
1113 /* Instruction costs on POWER9 processors. */
1114 static const
1115 struct processor_costs power9_cost = {
1116 COSTS_N_INSNS (3), /* mulsi */
1117 COSTS_N_INSNS (3), /* mulsi_const */
1118 COSTS_N_INSNS (3), /* mulsi_const9 */
1119 COSTS_N_INSNS (3), /* muldi */
1120 COSTS_N_INSNS (8), /* divsi */
1121 COSTS_N_INSNS (12), /* divdi */
1122 COSTS_N_INSNS (3), /* fp */
1123 COSTS_N_INSNS (3), /* dmul */
1124 COSTS_N_INSNS (13), /* sdiv */
1125 COSTS_N_INSNS (18), /* ddiv */
1126 128, /* cache line size */
1127 32, /* l1 cache */
1128 512, /* l2 cache */
1129 8, /* prefetch streams */
1130 COSTS_N_INSNS (3), /* SF->DF convert */
1133 /* Instruction costs on POWER A2 processors. */
1134 static const
1135 struct processor_costs ppca2_cost = {
1136 COSTS_N_INSNS (16), /* mulsi */
1137 COSTS_N_INSNS (16), /* mulsi_const */
1138 COSTS_N_INSNS (16), /* mulsi_const9 */
1139 COSTS_N_INSNS (16), /* muldi */
1140 COSTS_N_INSNS (22), /* divsi */
1141 COSTS_N_INSNS (28), /* divdi */
1142 COSTS_N_INSNS (3), /* fp */
1143 COSTS_N_INSNS (3), /* dmul */
1144 COSTS_N_INSNS (59), /* sdiv */
1145 COSTS_N_INSNS (72), /* ddiv */
1147 16, /* l1 cache */
1148 2048, /* l2 cache */
1149 16, /* prefetch streams */
1150 0, /* SF->DF convert */
1154 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1155 #undef RS6000_BUILTIN_0
1156 #undef RS6000_BUILTIN_1
1157 #undef RS6000_BUILTIN_2
1158 #undef RS6000_BUILTIN_3
1159 #undef RS6000_BUILTIN_A
1160 #undef RS6000_BUILTIN_D
1161 #undef RS6000_BUILTIN_E
1162 #undef RS6000_BUILTIN_H
1163 #undef RS6000_BUILTIN_P
1164 #undef RS6000_BUILTIN_Q
1165 #undef RS6000_BUILTIN_S
1166 #undef RS6000_BUILTIN_X
1168 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1169 { NAME, ICODE, MASK, ATTR },
1171 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1172 { NAME, ICODE, MASK, ATTR },
1174 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1175 { NAME, ICODE, MASK, ATTR },
1177 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1178 { NAME, ICODE, MASK, ATTR },
1180 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1181 { NAME, ICODE, MASK, ATTR },
1183 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1184 { NAME, ICODE, MASK, ATTR },
1186 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1187 { NAME, ICODE, MASK, ATTR },
1189 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1190 { NAME, ICODE, MASK, ATTR },
1192 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1193 { NAME, ICODE, MASK, ATTR },
1195 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1196 { NAME, ICODE, MASK, ATTR },
1198 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1199 { NAME, ICODE, MASK, ATTR },
1201 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1202 { NAME, ICODE, MASK, ATTR },
1204 struct rs6000_builtin_info_type {
1205 const char *name;
1206 const enum insn_code icode;
1207 const HOST_WIDE_INT mask;
1208 const unsigned attr;
1211 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1213 #include "rs6000-builtin.def"
1216 #undef RS6000_BUILTIN_0
1217 #undef RS6000_BUILTIN_1
1218 #undef RS6000_BUILTIN_2
1219 #undef RS6000_BUILTIN_3
1220 #undef RS6000_BUILTIN_A
1221 #undef RS6000_BUILTIN_D
1222 #undef RS6000_BUILTIN_E
1223 #undef RS6000_BUILTIN_H
1224 #undef RS6000_BUILTIN_P
1225 #undef RS6000_BUILTIN_Q
1226 #undef RS6000_BUILTIN_S
1227 #undef RS6000_BUILTIN_X
1229 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1230 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1233 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1234 static bool spe_func_has_64bit_regs_p (void);
1235 static struct machine_function * rs6000_init_machine_status (void);
1236 static int rs6000_ra_ever_killed (void);
1237 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1238 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1239 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1241 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1242 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1243 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1244 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1245 bool);
1246 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1247 unsigned int);
1248 static bool is_microcoded_insn (rtx_insn *);
1249 static bool is_nonpipeline_insn (rtx_insn *);
1250 static bool is_cracked_insn (rtx_insn *);
1251 static bool is_load_insn (rtx, rtx *);
1252 static bool is_store_insn (rtx, rtx *);
1253 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1254 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1255 static bool insn_must_be_first_in_group (rtx_insn *);
1256 static bool insn_must_be_last_in_group (rtx_insn *);
1257 static void altivec_init_builtins (void);
1258 static tree builtin_function_type (machine_mode, machine_mode,
1259 machine_mode, machine_mode,
1260 enum rs6000_builtins, const char *name);
1261 static void rs6000_common_init_builtins (void);
1262 static void paired_init_builtins (void);
1263 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1264 static void spe_init_builtins (void);
1265 static void htm_init_builtins (void);
1266 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1267 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1268 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1269 static rs6000_stack_t *rs6000_stack_info (void);
1270 static void is_altivec_return_reg (rtx, void *);
1271 int easy_vector_constant (rtx, machine_mode);
1272 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1273 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1274 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1275 bool, bool);
1276 #if TARGET_MACHO
1277 static void macho_branch_islands (void);
1278 #endif
1279 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1280 int, int *);
1281 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1282 int, int, int *);
1283 static bool rs6000_mode_dependent_address (const_rtx);
1284 static bool rs6000_debug_mode_dependent_address (const_rtx);
1285 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1286 machine_mode, rtx);
1287 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1288 machine_mode,
1289 rtx);
1290 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1291 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1292 enum reg_class);
1293 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1294 machine_mode);
1295 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1296 enum reg_class,
1297 machine_mode);
1298 static bool rs6000_cannot_change_mode_class (machine_mode,
1299 machine_mode,
1300 enum reg_class);
1301 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1302 machine_mode,
1303 enum reg_class);
1304 static bool rs6000_save_toc_in_prologue_p (void);
1305 static rtx rs6000_internal_arg_pointer (void);
1307 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1308 int, int *)
1309 = rs6000_legitimize_reload_address;
1311 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1312 = rs6000_mode_dependent_address;
1314 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1315 machine_mode, rtx)
1316 = rs6000_secondary_reload_class;
1318 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1319 = rs6000_preferred_reload_class;
1321 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1322 machine_mode)
1323 = rs6000_secondary_memory_needed;
1325 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1326 machine_mode,
1327 enum reg_class)
1328 = rs6000_cannot_change_mode_class;
1330 const int INSN_NOT_AVAILABLE = -1;
1332 static void rs6000_print_isa_options (FILE *, int, const char *,
1333 HOST_WIDE_INT);
1334 static void rs6000_print_builtin_options (FILE *, int, const char *,
1335 HOST_WIDE_INT);
1337 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1338 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1339 enum rs6000_reg_type,
1340 machine_mode,
1341 secondary_reload_info *,
1342 bool);
1343 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1344 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1345 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1347 /* Hash table stuff for keeping track of TOC entries. */
1349 struct GTY((for_user)) toc_hash_struct
1351 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1352 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1353 rtx key;
1354 machine_mode key_mode;
1355 int labelno;
1358 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1360 static hashval_t hash (toc_hash_struct *);
1361 static bool equal (toc_hash_struct *, toc_hash_struct *);
1364 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1366 /* Hash table to keep track of the argument types for builtin functions. */
1368 struct GTY((for_user)) builtin_hash_struct
1370 tree type;
1371 machine_mode mode[4]; /* return value + 3 arguments. */
1372 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1375 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1377 static hashval_t hash (builtin_hash_struct *);
1378 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1381 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1384 /* Default register names. */
1385 char rs6000_reg_names[][8] =
1387 "0", "1", "2", "3", "4", "5", "6", "7",
1388 "8", "9", "10", "11", "12", "13", "14", "15",
1389 "16", "17", "18", "19", "20", "21", "22", "23",
1390 "24", "25", "26", "27", "28", "29", "30", "31",
1391 "0", "1", "2", "3", "4", "5", "6", "7",
1392 "8", "9", "10", "11", "12", "13", "14", "15",
1393 "16", "17", "18", "19", "20", "21", "22", "23",
1394 "24", "25", "26", "27", "28", "29", "30", "31",
1395 "mq", "lr", "ctr","ap",
1396 "0", "1", "2", "3", "4", "5", "6", "7",
1397 "ca",
1398 /* AltiVec registers. */
1399 "0", "1", "2", "3", "4", "5", "6", "7",
1400 "8", "9", "10", "11", "12", "13", "14", "15",
1401 "16", "17", "18", "19", "20", "21", "22", "23",
1402 "24", "25", "26", "27", "28", "29", "30", "31",
1403 "vrsave", "vscr",
1404 /* SPE registers. */
1405 "spe_acc", "spefscr",
1406 /* Soft frame pointer. */
1407 "sfp",
1408 /* HTM SPR registers. */
1409 "tfhar", "tfiar", "texasr",
1410 /* SPE High registers. */
1411 "0", "1", "2", "3", "4", "5", "6", "7",
1412 "8", "9", "10", "11", "12", "13", "14", "15",
1413 "16", "17", "18", "19", "20", "21", "22", "23",
1414 "24", "25", "26", "27", "28", "29", "30", "31"
1417 #ifdef TARGET_REGNAMES
1418 static const char alt_reg_names[][8] =
1420 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1421 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1422 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1423 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1424 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1425 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1426 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1427 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1428 "mq", "lr", "ctr", "ap",
1429 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1430 "ca",
1431 /* AltiVec registers. */
1432 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1433 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1434 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1435 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1436 "vrsave", "vscr",
1437 /* SPE registers. */
1438 "spe_acc", "spefscr",
1439 /* Soft frame pointer. */
1440 "sfp",
1441 /* HTM SPR registers. */
1442 "tfhar", "tfiar", "texasr",
1443 /* SPE High registers. */
1444 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1445 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1446 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1447 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1449 #endif
1451 /* Table of valid machine attributes. */
1453 static const struct attribute_spec rs6000_attribute_table[] =
1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1456 affects_type_identity } */
1457 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1458 false },
1459 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1460 false },
1461 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1462 false },
1463 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1464 false },
1465 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1466 false },
1467 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1468 SUBTARGET_ATTRIBUTE_TABLE,
1469 #endif
1470 { NULL, 0, 0, false, false, false, NULL, false }
1473 #ifndef TARGET_PROFILE_KERNEL
1474 #define TARGET_PROFILE_KERNEL 0
1475 #endif
1477 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1478 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1480 /* Initialize the GCC target structure. */
1481 #undef TARGET_ATTRIBUTE_TABLE
1482 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1483 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1484 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1485 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1486 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1488 #undef TARGET_ASM_ALIGNED_DI_OP
1489 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1491 /* Default unaligned ops are only provided for ELF. Find the ops needed
1492 for non-ELF systems. */
1493 #ifndef OBJECT_FORMAT_ELF
1494 #if TARGET_XCOFF
1495 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1496 64-bit targets. */
1497 #undef TARGET_ASM_UNALIGNED_HI_OP
1498 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1499 #undef TARGET_ASM_UNALIGNED_SI_OP
1500 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1501 #undef TARGET_ASM_UNALIGNED_DI_OP
1502 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1503 #else
1504 /* For Darwin. */
1505 #undef TARGET_ASM_UNALIGNED_HI_OP
1506 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1507 #undef TARGET_ASM_UNALIGNED_SI_OP
1508 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1509 #undef TARGET_ASM_UNALIGNED_DI_OP
1510 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1511 #undef TARGET_ASM_ALIGNED_DI_OP
1512 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1513 #endif
1514 #endif
1516 /* This hook deals with fixups for relocatable code and DI-mode objects
1517 in 64-bit code. */
1518 #undef TARGET_ASM_INTEGER
1519 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1521 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1522 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1523 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1524 #endif
1526 #undef TARGET_SET_UP_BY_PROLOGUE
1527 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1529 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1530 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1531 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1532 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1533 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1534 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1535 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1536 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1537 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1538 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1539 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1540 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1542 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1543 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1545 #undef TARGET_INTERNAL_ARG_POINTER
1546 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1548 #undef TARGET_HAVE_TLS
1549 #define TARGET_HAVE_TLS HAVE_AS_TLS
1551 #undef TARGET_CANNOT_FORCE_CONST_MEM
1552 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1554 #undef TARGET_DELEGITIMIZE_ADDRESS
1555 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1557 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1558 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1560 #undef TARGET_ASM_FUNCTION_PROLOGUE
1561 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1562 #undef TARGET_ASM_FUNCTION_EPILOGUE
1563 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1565 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1566 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1568 #undef TARGET_LEGITIMIZE_ADDRESS
1569 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1571 #undef TARGET_SCHED_VARIABLE_ISSUE
1572 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1574 #undef TARGET_SCHED_ISSUE_RATE
1575 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1576 #undef TARGET_SCHED_ADJUST_COST
1577 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1578 #undef TARGET_SCHED_ADJUST_PRIORITY
1579 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1580 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1581 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1582 #undef TARGET_SCHED_INIT
1583 #define TARGET_SCHED_INIT rs6000_sched_init
1584 #undef TARGET_SCHED_FINISH
1585 #define TARGET_SCHED_FINISH rs6000_sched_finish
1586 #undef TARGET_SCHED_REORDER
1587 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1588 #undef TARGET_SCHED_REORDER2
1589 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1591 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1592 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1594 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1595 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1597 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1598 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1599 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1600 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1601 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1602 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1603 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1604 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1606 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1607 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1608 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1609 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1610 rs6000_builtin_support_vector_misalignment
1611 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1612 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1613 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1614 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1615 rs6000_builtin_vectorization_cost
1616 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1617 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1618 rs6000_preferred_simd_mode
1619 #undef TARGET_VECTORIZE_INIT_COST
1620 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1621 #undef TARGET_VECTORIZE_ADD_STMT_COST
1622 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1623 #undef TARGET_VECTORIZE_FINISH_COST
1624 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1625 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1626 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1628 #undef TARGET_INIT_BUILTINS
1629 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1630 #undef TARGET_BUILTIN_DECL
1631 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1633 #undef TARGET_FOLD_BUILTIN
1634 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1636 #undef TARGET_EXPAND_BUILTIN
1637 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1639 #undef TARGET_MANGLE_TYPE
1640 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1642 #undef TARGET_INIT_LIBFUNCS
1643 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1645 #if TARGET_MACHO
1646 #undef TARGET_BINDS_LOCAL_P
1647 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1648 #endif
1650 #undef TARGET_MS_BITFIELD_LAYOUT_P
1651 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1653 #undef TARGET_ASM_OUTPUT_MI_THUNK
1654 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1659 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1660 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1662 #undef TARGET_REGISTER_MOVE_COST
1663 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1664 #undef TARGET_MEMORY_MOVE_COST
1665 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1666 #undef TARGET_CANNOT_COPY_INSN_P
1667 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1668 #undef TARGET_RTX_COSTS
1669 #define TARGET_RTX_COSTS rs6000_rtx_costs
1670 #undef TARGET_ADDRESS_COST
1671 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1673 #undef TARGET_DWARF_REGISTER_SPAN
1674 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1676 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1677 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1679 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1680 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1682 #undef TARGET_PROMOTE_FUNCTION_MODE
1683 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1685 #undef TARGET_RETURN_IN_MEMORY
1686 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1688 #undef TARGET_RETURN_IN_MSB
1689 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1691 #undef TARGET_SETUP_INCOMING_VARARGS
1692 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1694 /* Always strict argument naming on rs6000. */
1695 #undef TARGET_STRICT_ARGUMENT_NAMING
1696 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1697 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1698 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1699 #undef TARGET_SPLIT_COMPLEX_ARG
1700 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1701 #undef TARGET_MUST_PASS_IN_STACK
1702 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1703 #undef TARGET_PASS_BY_REFERENCE
1704 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1705 #undef TARGET_ARG_PARTIAL_BYTES
1706 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1707 #undef TARGET_FUNCTION_ARG_ADVANCE
1708 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1709 #undef TARGET_FUNCTION_ARG
1710 #define TARGET_FUNCTION_ARG rs6000_function_arg
1711 #undef TARGET_FUNCTION_ARG_BOUNDARY
1712 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1714 #undef TARGET_BUILD_BUILTIN_VA_LIST
1715 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1717 #undef TARGET_EXPAND_BUILTIN_VA_START
1718 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1720 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1721 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1723 #undef TARGET_EH_RETURN_FILTER_MODE
1724 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1727 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1729 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1730 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1732 #undef TARGET_FLOATN_MODE
1733 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1735 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1736 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1738 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1739 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1741 #undef TARGET_MD_ASM_ADJUST
1742 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1744 #undef TARGET_OPTION_OVERRIDE
1745 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1749 rs6000_builtin_vectorized_function
1751 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1752 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1753 rs6000_builtin_md_vectorized_function
1755 #ifdef TARGET_THREAD_SSP_OFFSET
1756 #undef TARGET_STACK_PROTECT_GUARD
1757 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
1758 #endif
1760 #if !TARGET_MACHO
1761 #undef TARGET_STACK_PROTECT_FAIL
1762 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1763 #endif
1765 #ifdef HAVE_AS_TLS
1766 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1767 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1768 #endif
1770 /* Use a 32-bit anchor range. This leads to sequences like:
1772 addis tmp,anchor,high
1773 add dest,tmp,low
1775 where tmp itself acts as an anchor, and can be shared between
1776 accesses to the same 64k page. */
1777 #undef TARGET_MIN_ANCHOR_OFFSET
1778 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1779 #undef TARGET_MAX_ANCHOR_OFFSET
1780 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1781 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1782 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1783 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1784 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1786 #undef TARGET_BUILTIN_RECIPROCAL
1787 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1789 #undef TARGET_EXPAND_TO_RTL_HOOK
1790 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1792 #undef TARGET_INSTANTIATE_DECLS
1793 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1795 #undef TARGET_SECONDARY_RELOAD
1796 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1798 #undef TARGET_LEGITIMATE_ADDRESS_P
1799 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1801 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1802 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1804 #undef TARGET_LRA_P
1805 #define TARGET_LRA_P rs6000_lra_p
1807 #undef TARGET_CAN_ELIMINATE
1808 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1810 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1811 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1813 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1814 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1816 #undef TARGET_TRAMPOLINE_INIT
1817 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1819 #undef TARGET_FUNCTION_VALUE
1820 #define TARGET_FUNCTION_VALUE rs6000_function_value
1822 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1823 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1825 #undef TARGET_OPTION_SAVE
1826 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1828 #undef TARGET_OPTION_RESTORE
1829 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1831 #undef TARGET_OPTION_PRINT
1832 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1834 #undef TARGET_CAN_INLINE_P
1835 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1837 #undef TARGET_SET_CURRENT_FUNCTION
1838 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1840 #undef TARGET_LEGITIMATE_CONSTANT_P
1841 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1843 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1844 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1846 #undef TARGET_CAN_USE_DOLOOP_P
1847 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1852 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1853 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1854 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1855 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1856 #undef TARGET_UNWIND_WORD_MODE
1857 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1859 #undef TARGET_OFFLOAD_OPTIONS
1860 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1862 #undef TARGET_C_MODE_FOR_SUFFIX
1863 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1865 #undef TARGET_INVALID_BINARY_OP
1866 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1868 #undef TARGET_OPTAB_SUPPORTED_P
1869 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1871 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1872 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1875 /* Processor table. */
1876 struct rs6000_ptt
1878 const char *const name; /* Canonical processor name. */
1879 const enum processor_type processor; /* Processor type enum value. */
1880 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1883 static struct rs6000_ptt const processor_target_table[] =
1885 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1886 #include "rs6000-cpus.def"
1887 #undef RS6000_CPU
1890 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1891 name is invalid. */
1893 static int
1894 rs6000_cpu_name_lookup (const char *name)
1896 size_t i;
1898 if (name != NULL)
1900 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1901 if (! strcmp (name, processor_target_table[i].name))
1902 return (int)i;
1905 return -1;
1909 /* Return number of consecutive hard regs needed starting at reg REGNO
1910 to hold something of mode MODE.
1911 This is ordinarily the length in words of a value of mode MODE
1912 but can be less for certain modes in special long registers.
1914 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1915 scalar instructions. The upper 32 bits are only available to the
1916 SIMD instructions.
1918 POWER and PowerPC GPRs hold 32 bits worth;
1919 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1921 static int
1922 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1924 unsigned HOST_WIDE_INT reg_size;
1926 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1927 128-bit floating point that can go in vector registers, which has VSX
1928 memory addressing. */
1929 if (FP_REGNO_P (regno))
1930 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1931 ? UNITS_PER_VSX_WORD
1932 : UNITS_PER_FP_WORD);
1934 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1935 reg_size = UNITS_PER_SPE_WORD;
1937 else if (ALTIVEC_REGNO_P (regno))
1938 reg_size = UNITS_PER_ALTIVEC_WORD;
1940 /* The value returned for SCmode in the E500 double case is 2 for
1941 ABI compatibility; storing an SCmode value in a single register
1942 would require function_arg and rs6000_spe_function_arg to handle
1943 SCmode so as to pass the value correctly in a pair of
1944 registers. */
1945 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1946 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1947 reg_size = UNITS_PER_FP_WORD;
1949 else
1950 reg_size = UNITS_PER_WORD;
1952 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1955 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1956 MODE. */
1957 static int
1958 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1960 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1962 if (COMPLEX_MODE_P (mode))
1963 mode = GET_MODE_INNER (mode);
1965 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1966 register combinations, and use PTImode where we need to deal with quad
1967 word memory operations. Don't allow quad words in the argument or frame
1968 pointer registers, just registers 0..31. */
1969 if (mode == PTImode)
1970 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1971 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1972 && ((regno & 1) == 0));
1974 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1975 implementations. Don't allow an item to be split between a FP register
1976 and an Altivec register. Allow TImode in all VSX registers if the user
1977 asked for it. */
1978 if (TARGET_VSX && VSX_REGNO_P (regno)
1979 && (VECTOR_MEM_VSX_P (mode)
1980 || FLOAT128_VECTOR_P (mode)
1981 || reg_addr[mode].scalar_in_vmx_p
1982 || (TARGET_VSX_TIMODE && mode == TImode)
1983 || (TARGET_VADDUQM && mode == V1TImode)))
1985 if (FP_REGNO_P (regno))
1986 return FP_REGNO_P (last_regno);
1988 if (ALTIVEC_REGNO_P (regno))
1990 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1991 return 0;
1993 return ALTIVEC_REGNO_P (last_regno);
1997 /* The GPRs can hold any mode, but values bigger than one register
1998 cannot go past R31. */
1999 if (INT_REGNO_P (regno))
2000 return INT_REGNO_P (last_regno);
2002 /* The float registers (except for VSX vector modes) can only hold floating
2003 modes and DImode. */
2004 if (FP_REGNO_P (regno))
2006 if (FLOAT128_VECTOR_P (mode))
2007 return false;
2009 if (SCALAR_FLOAT_MODE_P (mode)
2010 && (mode != TDmode || (regno % 2) == 0)
2011 && FP_REGNO_P (last_regno))
2012 return 1;
2014 if (GET_MODE_CLASS (mode) == MODE_INT)
2016 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2017 return 1;
2019 if (TARGET_VSX_SMALL_INTEGER && mode == SImode)
2020 return 1;
2023 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2024 && PAIRED_VECTOR_MODE (mode))
2025 return 1;
2027 return 0;
2030 /* The CR register can only hold CC modes. */
2031 if (CR_REGNO_P (regno))
2032 return GET_MODE_CLASS (mode) == MODE_CC;
2034 if (CA_REGNO_P (regno))
2035 return mode == Pmode || mode == SImode;
2037 /* AltiVec only in AldyVec registers. */
2038 if (ALTIVEC_REGNO_P (regno))
2039 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2040 || mode == V1TImode);
2042 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2043 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2044 return 1;
2046 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2047 and it must be able to fit within the register set. */
2049 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2052 /* Print interesting facts about registers. */
2053 static void
2054 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2056 int r, m;
2058 for (r = first_regno; r <= last_regno; ++r)
2060 const char *comma = "";
2061 int len;
2063 if (first_regno == last_regno)
2064 fprintf (stderr, "%s:\t", reg_name);
2065 else
2066 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2068 len = 8;
2069 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2070 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2072 if (len > 70)
2074 fprintf (stderr, ",\n\t");
2075 len = 8;
2076 comma = "";
2079 if (rs6000_hard_regno_nregs[m][r] > 1)
2080 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2081 rs6000_hard_regno_nregs[m][r]);
2082 else
2083 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2085 comma = ", ";
2088 if (call_used_regs[r])
2090 if (len > 70)
2092 fprintf (stderr, ",\n\t");
2093 len = 8;
2094 comma = "";
2097 len += fprintf (stderr, "%s%s", comma, "call-used");
2098 comma = ", ";
2101 if (fixed_regs[r])
2103 if (len > 70)
2105 fprintf (stderr, ",\n\t");
2106 len = 8;
2107 comma = "";
2110 len += fprintf (stderr, "%s%s", comma, "fixed");
2111 comma = ", ";
2114 if (len > 70)
2116 fprintf (stderr, ",\n\t");
2117 comma = "";
2120 len += fprintf (stderr, "%sreg-class = %s", comma,
2121 reg_class_names[(int)rs6000_regno_regclass[r]]);
2122 comma = ", ";
2124 if (len > 70)
2126 fprintf (stderr, ",\n\t");
2127 comma = "";
2130 fprintf (stderr, "%sregno = %d\n", comma, r);
2134 static const char *
2135 rs6000_debug_vector_unit (enum rs6000_vector v)
2137 const char *ret;
2139 switch (v)
2141 case VECTOR_NONE: ret = "none"; break;
2142 case VECTOR_ALTIVEC: ret = "altivec"; break;
2143 case VECTOR_VSX: ret = "vsx"; break;
2144 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2145 case VECTOR_PAIRED: ret = "paired"; break;
2146 case VECTOR_SPE: ret = "spe"; break;
2147 case VECTOR_OTHER: ret = "other"; break;
2148 default: ret = "unknown"; break;
2151 return ret;
2154 /* Inner function printing just the address mask for a particular reload
2155 register class. */
2156 DEBUG_FUNCTION char *
2157 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2159 static char ret[8];
2160 char *p = ret;
2162 if ((mask & RELOAD_REG_VALID) != 0)
2163 *p++ = 'v';
2164 else if (keep_spaces)
2165 *p++ = ' ';
2167 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2168 *p++ = 'm';
2169 else if (keep_spaces)
2170 *p++ = ' ';
2172 if ((mask & RELOAD_REG_INDEXED) != 0)
2173 *p++ = 'i';
2174 else if (keep_spaces)
2175 *p++ = ' ';
2177 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2178 *p++ = 'O';
2179 else if ((mask & RELOAD_REG_OFFSET) != 0)
2180 *p++ = 'o';
2181 else if (keep_spaces)
2182 *p++ = ' ';
2184 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2185 *p++ = '+';
2186 else if (keep_spaces)
2187 *p++ = ' ';
2189 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2190 *p++ = '+';
2191 else if (keep_spaces)
2192 *p++ = ' ';
2194 if ((mask & RELOAD_REG_AND_M16) != 0)
2195 *p++ = '&';
2196 else if (keep_spaces)
2197 *p++ = ' ';
2199 *p = '\0';
2201 return ret;
2204 /* Print the address masks in a human readble fashion. */
2205 DEBUG_FUNCTION void
2206 rs6000_debug_print_mode (ssize_t m)
2208 ssize_t rc;
2209 int spaces = 0;
2210 bool fuse_extra_p;
2212 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2213 for (rc = 0; rc < N_RELOAD_REG; rc++)
2214 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2215 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2217 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2218 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2219 fprintf (stderr, " Reload=%c%c",
2220 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2221 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2222 else
2223 spaces += sizeof (" Reload=sl") - 1;
2225 if (reg_addr[m].scalar_in_vmx_p)
2227 fprintf (stderr, "%*s Upper=y", spaces, "");
2228 spaces = 0;
2230 else
2231 spaces += sizeof (" Upper=y") - 1;
2233 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2234 || reg_addr[m].fused_toc);
2235 if (!fuse_extra_p)
2237 for (rc = 0; rc < N_RELOAD_REG; rc++)
2239 if (rc != RELOAD_REG_ANY)
2241 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2242 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2243 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2244 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2245 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2247 fuse_extra_p = true;
2248 break;
2254 if (fuse_extra_p)
2256 fprintf (stderr, "%*s Fuse:", spaces, "");
2257 spaces = 0;
2259 for (rc = 0; rc < N_RELOAD_REG; rc++)
2261 if (rc != RELOAD_REG_ANY)
2263 char load, store;
2265 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2266 load = 'l';
2267 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2268 load = 'L';
2269 else
2270 load = '-';
2272 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2273 store = 's';
2274 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2275 store = 'S';
2276 else
2277 store = '-';
2279 if (load == '-' && store == '-')
2280 spaces += 5;
2281 else
2283 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2284 reload_reg_map[rc].name[0], load, store);
2285 spaces = 0;
2290 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2292 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2293 spaces = 0;
2295 else
2296 spaces += sizeof (" P8gpr") - 1;
2298 if (reg_addr[m].fused_toc)
2300 fprintf (stderr, "%*sToc", (spaces + 1), "");
2301 spaces = 0;
2303 else
2304 spaces += sizeof (" Toc") - 1;
2306 else
2307 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2309 if (rs6000_vector_unit[m] != VECTOR_NONE
2310 || rs6000_vector_mem[m] != VECTOR_NONE)
2312 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2313 spaces, "",
2314 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2315 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2318 fputs ("\n", stderr);
2321 #define DEBUG_FMT_ID "%-32s= "
2322 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2323 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2324 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2326 /* Print various interesting information with -mdebug=reg. */
2327 static void
2328 rs6000_debug_reg_global (void)
2330 static const char *const tf[2] = { "false", "true" };
2331 const char *nl = (const char *)0;
2332 int m;
2333 size_t m1, m2, v;
2334 char costly_num[20];
2335 char nop_num[20];
2336 char flags_buffer[40];
2337 const char *costly_str;
2338 const char *nop_str;
2339 const char *trace_str;
2340 const char *abi_str;
2341 const char *cmodel_str;
2342 struct cl_target_option cl_opts;
2344 /* Modes we want tieable information on. */
2345 static const machine_mode print_tieable_modes[] = {
2346 QImode,
2347 HImode,
2348 SImode,
2349 DImode,
2350 TImode,
2351 PTImode,
2352 SFmode,
2353 DFmode,
2354 TFmode,
2355 IFmode,
2356 KFmode,
2357 SDmode,
2358 DDmode,
2359 TDmode,
2360 V8QImode,
2361 V4HImode,
2362 V2SImode,
2363 V16QImode,
2364 V8HImode,
2365 V4SImode,
2366 V2DImode,
2367 V1TImode,
2368 V32QImode,
2369 V16HImode,
2370 V8SImode,
2371 V4DImode,
2372 V2TImode,
2373 V2SFmode,
2374 V4SFmode,
2375 V2DFmode,
2376 V8SFmode,
2377 V4DFmode,
2378 CCmode,
2379 CCUNSmode,
2380 CCEQmode,
2383 /* Virtual regs we are interested in. */
2384 const static struct {
2385 int regno; /* register number. */
2386 const char *name; /* register name. */
2387 } virtual_regs[] = {
2388 { STACK_POINTER_REGNUM, "stack pointer:" },
2389 { TOC_REGNUM, "toc: " },
2390 { STATIC_CHAIN_REGNUM, "static chain: " },
2391 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2392 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2393 { ARG_POINTER_REGNUM, "arg pointer: " },
2394 { FRAME_POINTER_REGNUM, "frame pointer:" },
2395 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2396 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2397 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2398 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2399 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2400 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2401 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2402 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2403 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2406 fputs ("\nHard register information:\n", stderr);
2407 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2408 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2409 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2410 LAST_ALTIVEC_REGNO,
2411 "vs");
2412 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2413 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2414 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2415 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2416 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2417 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2418 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2419 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2421 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2422 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2423 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2425 fprintf (stderr,
2426 "\n"
2427 "d reg_class = %s\n"
2428 "f reg_class = %s\n"
2429 "v reg_class = %s\n"
2430 "wa reg_class = %s\n"
2431 "wb reg_class = %s\n"
2432 "wd reg_class = %s\n"
2433 "we reg_class = %s\n"
2434 "wf reg_class = %s\n"
2435 "wg reg_class = %s\n"
2436 "wh reg_class = %s\n"
2437 "wi reg_class = %s\n"
2438 "wj reg_class = %s\n"
2439 "wk reg_class = %s\n"
2440 "wl reg_class = %s\n"
2441 "wm reg_class = %s\n"
2442 "wo reg_class = %s\n"
2443 "wp reg_class = %s\n"
2444 "wq reg_class = %s\n"
2445 "wr reg_class = %s\n"
2446 "ws reg_class = %s\n"
2447 "wt reg_class = %s\n"
2448 "wu reg_class = %s\n"
2449 "wv reg_class = %s\n"
2450 "ww reg_class = %s\n"
2451 "wx reg_class = %s\n"
2452 "wy reg_class = %s\n"
2453 "wz reg_class = %s\n"
2454 "wH reg_class = %s\n"
2455 "wI reg_class = %s\n"
2456 "wJ reg_class = %s\n"
2457 "wK reg_class = %s\n"
2458 "\n",
2459 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2460 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2461 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2462 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2463 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2464 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2465 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2466 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2467 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2468 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2469 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2470 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2471 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2472 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2473 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2474 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2475 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2476 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2478 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2479 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2480 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2481 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2482 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2483 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2484 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2485 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2486 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2487 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2488 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2489 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2491 nl = "\n";
2492 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2493 rs6000_debug_print_mode (m);
2495 fputs ("\n", stderr);
2497 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2499 machine_mode mode1 = print_tieable_modes[m1];
2500 bool first_time = true;
2502 nl = (const char *)0;
2503 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2505 machine_mode mode2 = print_tieable_modes[m2];
2506 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2508 if (first_time)
2510 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2511 nl = "\n";
2512 first_time = false;
2515 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2519 if (!first_time)
2520 fputs ("\n", stderr);
2523 if (nl)
2524 fputs (nl, stderr);
2526 if (rs6000_recip_control)
2528 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2530 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2531 if (rs6000_recip_bits[m])
2533 fprintf (stderr,
2534 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2535 GET_MODE_NAME (m),
2536 (RS6000_RECIP_AUTO_RE_P (m)
2537 ? "auto"
2538 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2539 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2540 ? "auto"
2541 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2544 fputs ("\n", stderr);
2547 if (rs6000_cpu_index >= 0)
2549 const char *name = processor_target_table[rs6000_cpu_index].name;
2550 HOST_WIDE_INT flags
2551 = processor_target_table[rs6000_cpu_index].target_enable;
2553 sprintf (flags_buffer, "-mcpu=%s flags", name);
2554 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2556 else
2557 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2559 if (rs6000_tune_index >= 0)
2561 const char *name = processor_target_table[rs6000_tune_index].name;
2562 HOST_WIDE_INT flags
2563 = processor_target_table[rs6000_tune_index].target_enable;
2565 sprintf (flags_buffer, "-mtune=%s flags", name);
2566 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2568 else
2569 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2571 cl_target_option_save (&cl_opts, &global_options);
2572 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2573 rs6000_isa_flags);
2575 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2576 rs6000_isa_flags_explicit);
2578 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2579 rs6000_builtin_mask);
2581 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2583 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2584 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2586 switch (rs6000_sched_costly_dep)
2588 case max_dep_latency:
2589 costly_str = "max_dep_latency";
2590 break;
2592 case no_dep_costly:
2593 costly_str = "no_dep_costly";
2594 break;
2596 case all_deps_costly:
2597 costly_str = "all_deps_costly";
2598 break;
2600 case true_store_to_load_dep_costly:
2601 costly_str = "true_store_to_load_dep_costly";
2602 break;
2604 case store_to_load_dep_costly:
2605 costly_str = "store_to_load_dep_costly";
2606 break;
2608 default:
2609 costly_str = costly_num;
2610 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2611 break;
2614 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2616 switch (rs6000_sched_insert_nops)
2618 case sched_finish_regroup_exact:
2619 nop_str = "sched_finish_regroup_exact";
2620 break;
2622 case sched_finish_pad_groups:
2623 nop_str = "sched_finish_pad_groups";
2624 break;
2626 case sched_finish_none:
2627 nop_str = "sched_finish_none";
2628 break;
2630 default:
2631 nop_str = nop_num;
2632 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2633 break;
2636 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2638 switch (rs6000_sdata)
2640 default:
2641 case SDATA_NONE:
2642 break;
2644 case SDATA_DATA:
2645 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2646 break;
2648 case SDATA_SYSV:
2649 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2650 break;
2652 case SDATA_EABI:
2653 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2654 break;
2658 switch (rs6000_traceback)
2660 case traceback_default: trace_str = "default"; break;
2661 case traceback_none: trace_str = "none"; break;
2662 case traceback_part: trace_str = "part"; break;
2663 case traceback_full: trace_str = "full"; break;
2664 default: trace_str = "unknown"; break;
2667 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2669 switch (rs6000_current_cmodel)
2671 case CMODEL_SMALL: cmodel_str = "small"; break;
2672 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2673 case CMODEL_LARGE: cmodel_str = "large"; break;
2674 default: cmodel_str = "unknown"; break;
2677 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2679 switch (rs6000_current_abi)
2681 case ABI_NONE: abi_str = "none"; break;
2682 case ABI_AIX: abi_str = "aix"; break;
2683 case ABI_ELFv2: abi_str = "ELFv2"; break;
2684 case ABI_V4: abi_str = "V4"; break;
2685 case ABI_DARWIN: abi_str = "darwin"; break;
2686 default: abi_str = "unknown"; break;
2689 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2691 if (rs6000_altivec_abi)
2692 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2694 if (rs6000_spe_abi)
2695 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2697 if (rs6000_darwin64_abi)
2698 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2700 if (rs6000_float_gprs)
2701 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2703 fprintf (stderr, DEBUG_FMT_S, "fprs",
2704 (TARGET_FPRS ? "true" : "false"));
2706 fprintf (stderr, DEBUG_FMT_S, "single_float",
2707 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2709 fprintf (stderr, DEBUG_FMT_S, "double_float",
2710 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2712 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2713 (TARGET_SOFT_FLOAT ? "true" : "false"));
2715 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2716 (TARGET_E500_SINGLE ? "true" : "false"));
2718 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2719 (TARGET_E500_DOUBLE ? "true" : "false"));
2721 if (TARGET_LINK_STACK)
2722 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2724 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2726 if (TARGET_P8_FUSION)
2728 char options[80];
2730 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2731 if (TARGET_TOC_FUSION)
2732 strcat (options, ", toc");
2734 if (TARGET_P8_FUSION_SIGN)
2735 strcat (options, ", sign");
2737 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2740 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2741 TARGET_SECURE_PLT ? "secure" : "bss");
2742 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2743 aix_struct_return ? "aix" : "sysv");
2744 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2745 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2746 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2747 tf[!!rs6000_align_branch_targets]);
2748 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2749 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2750 rs6000_long_double_type_size);
2751 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2752 (int)rs6000_sched_restricted_insns_priority);
2753 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2754 (int)END_BUILTINS);
2755 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2756 (int)RS6000_BUILTIN_COUNT);
2758 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2759 (int)TARGET_FLOAT128_ENABLE_TYPE);
2761 if (TARGET_VSX)
2762 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2763 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2765 if (TARGET_DIRECT_MOVE_128)
2766 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2767 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2771 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2772 legitimate address support to figure out the appropriate addressing to
2773 use. */
2775 static void
2776 rs6000_setup_reg_addr_masks (void)
2778 ssize_t rc, reg, m, nregs;
2779 addr_mask_type any_addr_mask, addr_mask;
2781 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2783 machine_mode m2 = (machine_mode) m;
2784 bool complex_p = false;
2785 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2786 size_t msize;
2788 if (COMPLEX_MODE_P (m2))
2790 complex_p = true;
2791 m2 = GET_MODE_INNER (m2);
2794 msize = GET_MODE_SIZE (m2);
2796 /* SDmode is special in that we want to access it only via REG+REG
2797 addressing on power7 and above, since we want to use the LFIWZX and
2798 STFIWZX instructions to load it. */
2799 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2801 any_addr_mask = 0;
2802 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2804 addr_mask = 0;
2805 reg = reload_reg_map[rc].reg;
2807 /* Can mode values go in the GPR/FPR/Altivec registers? */
2808 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2810 bool small_int_vsx_p = (small_int_p
2811 && (rc == RELOAD_REG_FPR
2812 || rc == RELOAD_REG_VMX));
2814 nregs = rs6000_hard_regno_nregs[m][reg];
2815 addr_mask |= RELOAD_REG_VALID;
2817 /* Indicate if the mode takes more than 1 physical register. If
2818 it takes a single register, indicate it can do REG+REG
2819 addressing. Small integers in VSX registers can only do
2820 REG+REG addressing. */
2821 if (small_int_vsx_p)
2822 addr_mask |= RELOAD_REG_INDEXED;
2823 else if (nregs > 1 || m == BLKmode || complex_p)
2824 addr_mask |= RELOAD_REG_MULTIPLE;
2825 else
2826 addr_mask |= RELOAD_REG_INDEXED;
2828 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2829 addressing. Restrict addressing on SPE for 64-bit types
2830 because of the SUBREG hackery used to address 64-bit floats in
2831 '32-bit' GPRs. If we allow scalars into Altivec registers,
2832 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2834 if (TARGET_UPDATE
2835 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2836 && msize <= 8
2837 && !VECTOR_MODE_P (m2)
2838 && !FLOAT128_VECTOR_P (m2)
2839 && !complex_p
2840 && !small_int_vsx_p
2841 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2842 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2843 && !(TARGET_E500_DOUBLE && msize == 8))
2845 addr_mask |= RELOAD_REG_PRE_INCDEC;
2847 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2848 we don't allow PRE_MODIFY for some multi-register
2849 operations. */
2850 switch (m)
2852 default:
2853 addr_mask |= RELOAD_REG_PRE_MODIFY;
2854 break;
2856 case DImode:
2857 if (TARGET_POWERPC64)
2858 addr_mask |= RELOAD_REG_PRE_MODIFY;
2859 break;
2861 case DFmode:
2862 case DDmode:
2863 if (TARGET_DF_INSN)
2864 addr_mask |= RELOAD_REG_PRE_MODIFY;
2865 break;
2870 /* GPR and FPR registers can do REG+OFFSET addressing, except
2871 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2872 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2873 if ((addr_mask != 0) && !indexed_only_p
2874 && msize <= 8
2875 && (rc == RELOAD_REG_GPR
2876 || ((msize == 8 || m2 == SFmode)
2877 && (rc == RELOAD_REG_FPR
2878 || (rc == RELOAD_REG_VMX
2879 && TARGET_P9_DFORM_SCALAR)))))
2880 addr_mask |= RELOAD_REG_OFFSET;
2882 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2883 instructions are enabled. The offset for 128-bit VSX registers is
2884 only 12-bits. While GPRs can handle the full offset range, VSX
2885 registers can only handle the restricted range. */
2886 else if ((addr_mask != 0) && !indexed_only_p
2887 && msize == 16 && TARGET_P9_DFORM_VECTOR
2888 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2889 || (m2 == TImode && TARGET_VSX_TIMODE)))
2891 addr_mask |= RELOAD_REG_OFFSET;
2892 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2893 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2896 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2897 addressing on 128-bit types. */
2898 if (rc == RELOAD_REG_VMX && msize == 16
2899 && (addr_mask & RELOAD_REG_VALID) != 0)
2900 addr_mask |= RELOAD_REG_AND_M16;
2902 reg_addr[m].addr_mask[rc] = addr_mask;
2903 any_addr_mask |= addr_mask;
2906 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2911 /* Initialize the various global tables that are based on register size. */
2912 static void
2913 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2915 ssize_t r, m, c;
2916 int align64;
2917 int align32;
2919 /* Precalculate REGNO_REG_CLASS. */
2920 rs6000_regno_regclass[0] = GENERAL_REGS;
2921 for (r = 1; r < 32; ++r)
2922 rs6000_regno_regclass[r] = BASE_REGS;
2924 for (r = 32; r < 64; ++r)
2925 rs6000_regno_regclass[r] = FLOAT_REGS;
2927 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2928 rs6000_regno_regclass[r] = NO_REGS;
2930 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2931 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2933 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2934 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2935 rs6000_regno_regclass[r] = CR_REGS;
2937 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2938 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2939 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2940 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2941 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2942 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2943 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2944 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2945 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2946 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2947 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2948 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2950 /* Precalculate register class to simpler reload register class. We don't
2951 need all of the register classes that are combinations of different
2952 classes, just the simple ones that have constraint letters. */
2953 for (c = 0; c < N_REG_CLASSES; c++)
2954 reg_class_to_reg_type[c] = NO_REG_TYPE;
2956 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2957 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2958 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2959 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2960 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2961 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2962 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2963 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2964 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2965 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2966 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2967 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2969 if (TARGET_VSX)
2971 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2972 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2974 else
2976 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2977 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2980 /* Precalculate the valid memory formats as well as the vector information,
2981 this must be set up before the rs6000_hard_regno_nregs_internal calls
2982 below. */
2983 gcc_assert ((int)VECTOR_NONE == 0);
2984 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2985 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2987 gcc_assert ((int)CODE_FOR_nothing == 0);
2988 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2990 gcc_assert ((int)NO_REGS == 0);
2991 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2993 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2994 believes it can use native alignment or still uses 128-bit alignment. */
2995 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2997 align64 = 64;
2998 align32 = 32;
3000 else
3002 align64 = 128;
3003 align32 = 128;
3006 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3007 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3008 if (TARGET_FLOAT128_TYPE)
3010 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3011 rs6000_vector_align[KFmode] = 128;
3013 if (FLOAT128_IEEE_P (TFmode))
3015 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3016 rs6000_vector_align[TFmode] = 128;
3020 /* V2DF mode, VSX only. */
3021 if (TARGET_VSX)
3023 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3024 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3025 rs6000_vector_align[V2DFmode] = align64;
3028 /* V4SF mode, either VSX or Altivec. */
3029 if (TARGET_VSX)
3031 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3032 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3033 rs6000_vector_align[V4SFmode] = align32;
3035 else if (TARGET_ALTIVEC)
3037 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3038 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3039 rs6000_vector_align[V4SFmode] = align32;
3042 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3043 and stores. */
3044 if (TARGET_ALTIVEC)
3046 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3047 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3048 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3049 rs6000_vector_align[V4SImode] = align32;
3050 rs6000_vector_align[V8HImode] = align32;
3051 rs6000_vector_align[V16QImode] = align32;
3053 if (TARGET_VSX)
3055 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3056 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3057 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3059 else
3061 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3062 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3063 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3067 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3068 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3069 if (TARGET_VSX)
3071 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3072 rs6000_vector_unit[V2DImode]
3073 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3074 rs6000_vector_align[V2DImode] = align64;
3076 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3077 rs6000_vector_unit[V1TImode]
3078 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3079 rs6000_vector_align[V1TImode] = 128;
3082 /* DFmode, see if we want to use the VSX unit. Memory is handled
3083 differently, so don't set rs6000_vector_mem. */
3084 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3086 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3087 rs6000_vector_align[DFmode] = 64;
3090 /* SFmode, see if we want to use the VSX unit. */
3091 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3093 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3094 rs6000_vector_align[SFmode] = 32;
3097 /* Allow TImode in VSX register and set the VSX memory macros. */
3098 if (TARGET_VSX && TARGET_VSX_TIMODE)
3100 rs6000_vector_mem[TImode] = VECTOR_VSX;
3101 rs6000_vector_align[TImode] = align64;
3104 /* TODO add SPE and paired floating point vector support. */
3106 /* Register class constraints for the constraints that depend on compile
3107 switches. When the VSX code was added, different constraints were added
3108 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3109 of the VSX registers are used. The register classes for scalar floating
3110 point types is set, based on whether we allow that type into the upper
3111 (Altivec) registers. GCC has register classes to target the Altivec
3112 registers for load/store operations, to select using a VSX memory
3113 operation instead of the traditional floating point operation. The
3114 constraints are:
3116 d - Register class to use with traditional DFmode instructions.
3117 f - Register class to use with traditional SFmode instructions.
3118 v - Altivec register.
3119 wa - Any VSX register.
3120 wc - Reserved to represent individual CR bits (used in LLVM).
3121 wd - Preferred register class for V2DFmode.
3122 wf - Preferred register class for V4SFmode.
3123 wg - Float register for power6x move insns.
3124 wh - FP register for direct move instructions.
3125 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3126 wj - FP or VSX register to hold 64-bit integers for direct moves.
3127 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3128 wl - Float register if we can do 32-bit signed int loads.
3129 wm - VSX register for ISA 2.07 direct move operations.
3130 wn - always NO_REGS.
3131 wr - GPR if 64-bit mode is permitted.
3132 ws - Register class to do ISA 2.06 DF operations.
3133 wt - VSX register for TImode in VSX registers.
3134 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3135 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3136 ww - Register class to do SF conversions in with VSX operations.
3137 wx - Float register if we can do 32-bit int stores.
3138 wy - Register class to do ISA 2.07 SF operations.
3139 wz - Float register if we can do 32-bit unsigned int loads.
3140 wH - Altivec register if SImode is allowed in VSX registers.
3141 wI - VSX register if SImode is allowed in VSX registers.
3142 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3143 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3145 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3146 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3148 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3149 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3151 if (TARGET_VSX)
3153 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3154 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3155 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3157 if (TARGET_VSX_TIMODE)
3158 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3160 if (TARGET_UPPER_REGS_DF) /* DFmode */
3162 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3163 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3165 else
3166 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3168 if (TARGET_UPPER_REGS_DF) /* DImode */
3169 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3170 else
3171 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3174 /* Add conditional constraints based on various options, to allow us to
3175 collapse multiple insn patterns. */
3176 if (TARGET_ALTIVEC)
3177 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3179 if (TARGET_MFPGPR) /* DFmode */
3180 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3182 if (TARGET_LFIWAX)
3183 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3185 if (TARGET_DIRECT_MOVE)
3187 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3188 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3189 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3190 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3191 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3192 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3195 if (TARGET_POWERPC64)
3196 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3198 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3200 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3201 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3202 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3204 else if (TARGET_P8_VECTOR)
3206 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3207 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3209 else if (TARGET_VSX)
3210 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3212 if (TARGET_STFIWX)
3213 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3215 if (TARGET_LFIWZX)
3216 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3218 if (TARGET_FLOAT128_TYPE)
3220 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3221 if (FLOAT128_IEEE_P (TFmode))
3222 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3225 /* Support for new D-form instructions. */
3226 if (TARGET_P9_DFORM_SCALAR)
3227 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3229 /* Support for ISA 3.0 (power9) vectors. */
3230 if (TARGET_P9_VECTOR)
3231 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3233 /* Support for new direct moves (ISA 3.0 + 64bit). */
3234 if (TARGET_DIRECT_MOVE_128)
3235 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3237 /* Support small integers in VSX registers. */
3238 if (TARGET_VSX_SMALL_INTEGER)
3240 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3241 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3242 if (TARGET_P9_VECTOR)
3244 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3245 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3249 /* Set up the reload helper and direct move functions. */
3250 if (TARGET_VSX || TARGET_ALTIVEC)
3252 if (TARGET_64BIT)
3254 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3255 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3256 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3257 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3258 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3259 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3260 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3261 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3262 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3263 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3264 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3265 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3266 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3267 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3268 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3269 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3270 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3271 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3272 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3273 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3275 if (FLOAT128_VECTOR_P (KFmode))
3277 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3278 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3281 if (FLOAT128_VECTOR_P (TFmode))
3283 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3284 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3287 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3288 available. */
3289 if (TARGET_NO_SDMODE_STACK)
3291 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3292 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3295 if (TARGET_VSX_TIMODE)
3297 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3298 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3301 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3303 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3304 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3305 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3306 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3307 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3308 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3309 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3310 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3311 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3313 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3314 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3315 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3316 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3317 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3318 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3319 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3320 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3321 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3323 if (FLOAT128_VECTOR_P (KFmode))
3325 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3326 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3329 if (FLOAT128_VECTOR_P (TFmode))
3331 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3332 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3336 else
3338 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3339 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3340 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3341 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3342 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3343 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3344 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3345 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3346 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3347 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3348 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3349 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3350 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3351 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3352 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3353 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3354 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3355 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3356 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3357 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3359 if (FLOAT128_VECTOR_P (KFmode))
3361 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3362 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3365 if (FLOAT128_IEEE_P (TFmode))
3367 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3368 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3371 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3372 available. */
3373 if (TARGET_NO_SDMODE_STACK)
3375 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3376 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3379 if (TARGET_VSX_TIMODE)
3381 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3382 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3385 if (TARGET_DIRECT_MOVE)
3387 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3388 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3389 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3393 if (TARGET_UPPER_REGS_DF)
3394 reg_addr[DFmode].scalar_in_vmx_p = true;
3396 if (TARGET_UPPER_REGS_DI)
3397 reg_addr[DImode].scalar_in_vmx_p = true;
3399 if (TARGET_UPPER_REGS_SF)
3400 reg_addr[SFmode].scalar_in_vmx_p = true;
3402 if (TARGET_VSX_SMALL_INTEGER)
3403 reg_addr[SImode].scalar_in_vmx_p = true;
3406 /* Setup the fusion operations. */
3407 if (TARGET_P8_FUSION)
3409 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3410 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3411 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3412 if (TARGET_64BIT)
3413 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3416 if (TARGET_P9_FUSION)
3418 struct fuse_insns {
3419 enum machine_mode mode; /* mode of the fused type. */
3420 enum machine_mode pmode; /* pointer mode. */
3421 enum rs6000_reload_reg_type rtype; /* register type. */
3422 enum insn_code load; /* load insn. */
3423 enum insn_code store; /* store insn. */
3426 static const struct fuse_insns addis_insns[] = {
3427 { SFmode, DImode, RELOAD_REG_FPR,
3428 CODE_FOR_fusion_fpr_di_sf_load,
3429 CODE_FOR_fusion_fpr_di_sf_store },
3431 { SFmode, SImode, RELOAD_REG_FPR,
3432 CODE_FOR_fusion_fpr_si_sf_load,
3433 CODE_FOR_fusion_fpr_si_sf_store },
3435 { DFmode, DImode, RELOAD_REG_FPR,
3436 CODE_FOR_fusion_fpr_di_df_load,
3437 CODE_FOR_fusion_fpr_di_df_store },
3439 { DFmode, SImode, RELOAD_REG_FPR,
3440 CODE_FOR_fusion_fpr_si_df_load,
3441 CODE_FOR_fusion_fpr_si_df_store },
3443 { DImode, DImode, RELOAD_REG_FPR,
3444 CODE_FOR_fusion_fpr_di_di_load,
3445 CODE_FOR_fusion_fpr_di_di_store },
3447 { DImode, SImode, RELOAD_REG_FPR,
3448 CODE_FOR_fusion_fpr_si_di_load,
3449 CODE_FOR_fusion_fpr_si_di_store },
3451 { QImode, DImode, RELOAD_REG_GPR,
3452 CODE_FOR_fusion_gpr_di_qi_load,
3453 CODE_FOR_fusion_gpr_di_qi_store },
3455 { QImode, SImode, RELOAD_REG_GPR,
3456 CODE_FOR_fusion_gpr_si_qi_load,
3457 CODE_FOR_fusion_gpr_si_qi_store },
3459 { HImode, DImode, RELOAD_REG_GPR,
3460 CODE_FOR_fusion_gpr_di_hi_load,
3461 CODE_FOR_fusion_gpr_di_hi_store },
3463 { HImode, SImode, RELOAD_REG_GPR,
3464 CODE_FOR_fusion_gpr_si_hi_load,
3465 CODE_FOR_fusion_gpr_si_hi_store },
3467 { SImode, DImode, RELOAD_REG_GPR,
3468 CODE_FOR_fusion_gpr_di_si_load,
3469 CODE_FOR_fusion_gpr_di_si_store },
3471 { SImode, SImode, RELOAD_REG_GPR,
3472 CODE_FOR_fusion_gpr_si_si_load,
3473 CODE_FOR_fusion_gpr_si_si_store },
3475 { SFmode, DImode, RELOAD_REG_GPR,
3476 CODE_FOR_fusion_gpr_di_sf_load,
3477 CODE_FOR_fusion_gpr_di_sf_store },
3479 { SFmode, SImode, RELOAD_REG_GPR,
3480 CODE_FOR_fusion_gpr_si_sf_load,
3481 CODE_FOR_fusion_gpr_si_sf_store },
3483 { DImode, DImode, RELOAD_REG_GPR,
3484 CODE_FOR_fusion_gpr_di_di_load,
3485 CODE_FOR_fusion_gpr_di_di_store },
3487 { DFmode, DImode, RELOAD_REG_GPR,
3488 CODE_FOR_fusion_gpr_di_df_load,
3489 CODE_FOR_fusion_gpr_di_df_store },
3492 enum machine_mode cur_pmode = Pmode;
3493 size_t i;
3495 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3497 enum machine_mode xmode = addis_insns[i].mode;
3498 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3500 if (addis_insns[i].pmode != cur_pmode)
3501 continue;
3503 if (rtype == RELOAD_REG_FPR
3504 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3505 continue;
3507 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3508 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3512 /* Note which types we support fusing TOC setup plus memory insn. We only do
3513 fused TOCs for medium/large code models. */
3514 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3515 && (TARGET_CMODEL != CMODEL_SMALL))
3517 reg_addr[QImode].fused_toc = true;
3518 reg_addr[HImode].fused_toc = true;
3519 reg_addr[SImode].fused_toc = true;
3520 reg_addr[DImode].fused_toc = true;
3521 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3523 if (TARGET_SINGLE_FLOAT)
3524 reg_addr[SFmode].fused_toc = true;
3525 if (TARGET_DOUBLE_FLOAT)
3526 reg_addr[DFmode].fused_toc = true;
3530 /* Precalculate HARD_REGNO_NREGS. */
3531 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3532 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3533 rs6000_hard_regno_nregs[m][r]
3534 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3536 /* Precalculate HARD_REGNO_MODE_OK. */
3537 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3538 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3539 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3540 rs6000_hard_regno_mode_ok_p[m][r] = true;
3542 /* Precalculate CLASS_MAX_NREGS sizes. */
3543 for (c = 0; c < LIM_REG_CLASSES; ++c)
3545 int reg_size;
3547 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3548 reg_size = UNITS_PER_VSX_WORD;
3550 else if (c == ALTIVEC_REGS)
3551 reg_size = UNITS_PER_ALTIVEC_WORD;
3553 else if (c == FLOAT_REGS)
3554 reg_size = UNITS_PER_FP_WORD;
3556 else
3557 reg_size = UNITS_PER_WORD;
3559 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3561 machine_mode m2 = (machine_mode)m;
3562 int reg_size2 = reg_size;
3564 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3565 in VSX. */
3566 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3567 reg_size2 = UNITS_PER_FP_WORD;
3569 rs6000_class_max_nregs[m][c]
3570 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3574 if (TARGET_E500_DOUBLE)
3575 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3577 /* Calculate which modes to automatically generate code to use a the
3578 reciprocal divide and square root instructions. In the future, possibly
3579 automatically generate the instructions even if the user did not specify
3580 -mrecip. The older machines double precision reciprocal sqrt estimate is
3581 not accurate enough. */
3582 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3583 if (TARGET_FRES)
3584 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3585 if (TARGET_FRE)
3586 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3587 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3588 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3589 if (VECTOR_UNIT_VSX_P (V2DFmode))
3590 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3592 if (TARGET_FRSQRTES)
3593 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3594 if (TARGET_FRSQRTE)
3595 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3596 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3597 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3598 if (VECTOR_UNIT_VSX_P (V2DFmode))
3599 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3601 if (rs6000_recip_control)
3603 if (!flag_finite_math_only)
3604 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3605 if (flag_trapping_math)
3606 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3607 if (!flag_reciprocal_math)
3608 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3609 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3611 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3612 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3613 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3615 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3616 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3617 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3619 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3620 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3621 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3623 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3624 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3625 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3627 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3628 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3629 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3631 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3632 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3633 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3635 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3636 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3637 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3639 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3640 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3641 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3645 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3646 legitimate address support to figure out the appropriate addressing to
3647 use. */
3648 rs6000_setup_reg_addr_masks ();
3650 if (global_init_p || TARGET_DEBUG_TARGET)
3652 if (TARGET_DEBUG_REG)
3653 rs6000_debug_reg_global ();
3655 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3656 fprintf (stderr,
3657 "SImode variable mult cost = %d\n"
3658 "SImode constant mult cost = %d\n"
3659 "SImode short constant mult cost = %d\n"
3660 "DImode multipliciation cost = %d\n"
3661 "SImode division cost = %d\n"
3662 "DImode division cost = %d\n"
3663 "Simple fp operation cost = %d\n"
3664 "DFmode multiplication cost = %d\n"
3665 "SFmode division cost = %d\n"
3666 "DFmode division cost = %d\n"
3667 "cache line size = %d\n"
3668 "l1 cache size = %d\n"
3669 "l2 cache size = %d\n"
3670 "simultaneous prefetches = %d\n"
3671 "\n",
3672 rs6000_cost->mulsi,
3673 rs6000_cost->mulsi_const,
3674 rs6000_cost->mulsi_const9,
3675 rs6000_cost->muldi,
3676 rs6000_cost->divsi,
3677 rs6000_cost->divdi,
3678 rs6000_cost->fp,
3679 rs6000_cost->dmul,
3680 rs6000_cost->sdiv,
3681 rs6000_cost->ddiv,
3682 rs6000_cost->cache_line_size,
3683 rs6000_cost->l1_cache_size,
3684 rs6000_cost->l2_cache_size,
3685 rs6000_cost->simultaneous_prefetches);
3689 #if TARGET_MACHO
3690 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3692 static void
3693 darwin_rs6000_override_options (void)
3695 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3696 off. */
3697 rs6000_altivec_abi = 1;
3698 TARGET_ALTIVEC_VRSAVE = 1;
3699 rs6000_current_abi = ABI_DARWIN;
3701 if (DEFAULT_ABI == ABI_DARWIN
3702 && TARGET_64BIT)
3703 darwin_one_byte_bool = 1;
3705 if (TARGET_64BIT && ! TARGET_POWERPC64)
3707 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3708 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3710 if (flag_mkernel)
3712 rs6000_default_long_calls = 1;
3713 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3716 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3717 Altivec. */
3718 if (!flag_mkernel && !flag_apple_kext
3719 && TARGET_64BIT
3720 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3721 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3723 /* Unless the user (not the configurer) has explicitly overridden
3724 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3725 G4 unless targeting the kernel. */
3726 if (!flag_mkernel
3727 && !flag_apple_kext
3728 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3729 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3730 && ! global_options_set.x_rs6000_cpu_index)
3732 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3735 #endif
3737 /* If not otherwise specified by a target, make 'long double' equivalent to
3738 'double'. */
3740 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3741 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3742 #endif
3744 /* Return the builtin mask of the various options used that could affect which
3745 builtins were used. In the past we used target_flags, but we've run out of
3746 bits, and some options like SPE and PAIRED are no longer in
3747 target_flags. */
3749 HOST_WIDE_INT
3750 rs6000_builtin_mask_calculate (void)
3752 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3753 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3754 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3755 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3756 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3757 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3758 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3759 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3760 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3761 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3762 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3763 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3764 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3765 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3766 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3767 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3768 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3769 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3770 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3771 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3772 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3775 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3776 to clobber the XER[CA] bit because clobbering that bit without telling
3777 the compiler worked just fine with versions of GCC before GCC 5, and
3778 breaking a lot of older code in ways that are hard to track down is
3779 not such a great idea. */
3781 static rtx_insn *
3782 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3783 vec<const char *> &/*constraints*/,
3784 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3786 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3787 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3788 return NULL;
3791 /* Override command line options. Mostly we process the processor type and
3792 sometimes adjust other TARGET_ options. */
3794 static bool
3795 rs6000_option_override_internal (bool global_init_p)
3797 bool ret = true;
3798 bool have_cpu = false;
3800 /* The default cpu requested at configure time, if any. */
3801 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3803 HOST_WIDE_INT set_masks;
3804 int cpu_index;
3805 int tune_index;
3806 struct cl_target_option *main_target_opt
3807 = ((global_init_p || target_option_default_node == NULL)
3808 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3810 /* Print defaults. */
3811 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3812 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3814 /* Remember the explicit arguments. */
3815 if (global_init_p)
3816 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3818 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3819 library functions, so warn about it. The flag may be useful for
3820 performance studies from time to time though, so don't disable it
3821 entirely. */
3822 if (global_options_set.x_rs6000_alignment_flags
3823 && rs6000_alignment_flags == MASK_ALIGN_POWER
3824 && DEFAULT_ABI == ABI_DARWIN
3825 && TARGET_64BIT)
3826 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3827 " it is incompatible with the installed C and C++ libraries");
3829 /* Numerous experiment shows that IRA based loop pressure
3830 calculation works better for RTL loop invariant motion on targets
3831 with enough (>= 32) registers. It is an expensive optimization.
3832 So it is on only for peak performance. */
3833 if (optimize >= 3 && global_init_p
3834 && !global_options_set.x_flag_ira_loop_pressure)
3835 flag_ira_loop_pressure = 1;
3837 /* Set the pointer size. */
3838 if (TARGET_64BIT)
3840 rs6000_pmode = (int)DImode;
3841 rs6000_pointer_size = 64;
3843 else
3845 rs6000_pmode = (int)SImode;
3846 rs6000_pointer_size = 32;
3849 /* Some OSs don't support saving the high part of 64-bit registers on context
3850 switch. Other OSs don't support saving Altivec registers. On those OSs,
3851 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3852 if the user wants either, the user must explicitly specify them and we
3853 won't interfere with the user's specification. */
3855 set_masks = POWERPC_MASKS;
3856 #ifdef OS_MISSING_POWERPC64
3857 if (OS_MISSING_POWERPC64)
3858 set_masks &= ~OPTION_MASK_POWERPC64;
3859 #endif
3860 #ifdef OS_MISSING_ALTIVEC
3861 if (OS_MISSING_ALTIVEC)
3862 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3863 #endif
3865 /* Don't override by the processor default if given explicitly. */
3866 set_masks &= ~rs6000_isa_flags_explicit;
3868 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3869 the cpu in a target attribute or pragma, but did not specify a tuning
3870 option, use the cpu for the tuning option rather than the option specified
3871 with -mtune on the command line. Process a '--with-cpu' configuration
3872 request as an implicit --cpu. */
3873 if (rs6000_cpu_index >= 0)
3875 cpu_index = rs6000_cpu_index;
3876 have_cpu = true;
3878 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3880 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3881 have_cpu = true;
3883 else if (implicit_cpu)
3885 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3886 have_cpu = true;
3888 else
3890 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3891 const char *default_cpu = ((!TARGET_POWERPC64)
3892 ? "powerpc"
3893 : ((BYTES_BIG_ENDIAN)
3894 ? "powerpc64"
3895 : "powerpc64le"));
3897 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3898 have_cpu = false;
3901 gcc_assert (cpu_index >= 0);
3903 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3904 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3905 with those from the cpu, except for options that were explicitly set. If
3906 we don't have a cpu, do not override the target bits set in
3907 TARGET_DEFAULT. */
3908 if (have_cpu)
3910 rs6000_isa_flags &= ~set_masks;
3911 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3912 & set_masks);
3914 else
3916 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3917 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3918 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3919 to using rs6000_isa_flags, we need to do the initialization here.
3921 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3922 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3923 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3924 : processor_target_table[cpu_index].target_enable);
3925 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3928 if (rs6000_tune_index >= 0)
3929 tune_index = rs6000_tune_index;
3930 else if (have_cpu)
3931 rs6000_tune_index = tune_index = cpu_index;
3932 else
3934 size_t i;
3935 enum processor_type tune_proc
3936 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3938 tune_index = -1;
3939 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3940 if (processor_target_table[i].processor == tune_proc)
3942 rs6000_tune_index = tune_index = i;
3943 break;
3947 gcc_assert (tune_index >= 0);
3948 rs6000_cpu = processor_target_table[tune_index].processor;
3950 /* Pick defaults for SPE related control flags. Do this early to make sure
3951 that the TARGET_ macros are representative ASAP. */
3953 int spe_capable_cpu =
3954 (rs6000_cpu == PROCESSOR_PPC8540
3955 || rs6000_cpu == PROCESSOR_PPC8548);
3957 if (!global_options_set.x_rs6000_spe_abi)
3958 rs6000_spe_abi = spe_capable_cpu;
3960 if (!global_options_set.x_rs6000_spe)
3961 rs6000_spe = spe_capable_cpu;
3963 if (!global_options_set.x_rs6000_float_gprs)
3964 rs6000_float_gprs =
3965 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3966 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3967 : 0);
3970 if (global_options_set.x_rs6000_spe_abi
3971 && rs6000_spe_abi
3972 && !TARGET_SPE_ABI)
3973 error ("not configured for SPE ABI");
3975 if (global_options_set.x_rs6000_spe
3976 && rs6000_spe
3977 && !TARGET_SPE)
3978 error ("not configured for SPE instruction set");
3980 if (main_target_opt != NULL
3981 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3982 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3983 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3984 error ("target attribute or pragma changes SPE ABI");
3986 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3987 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3988 || rs6000_cpu == PROCESSOR_PPCE5500)
3990 if (TARGET_ALTIVEC)
3991 error ("AltiVec not supported in this target");
3992 if (TARGET_SPE)
3993 error ("SPE not supported in this target");
3995 if (rs6000_cpu == PROCESSOR_PPCE6500)
3997 if (TARGET_SPE)
3998 error ("SPE not supported in this target");
4001 /* Disable Cell microcode if we are optimizing for the Cell
4002 and not optimizing for size. */
4003 if (rs6000_gen_cell_microcode == -1)
4004 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4005 && !optimize_size);
4007 /* If we are optimizing big endian systems for space and it's OK to
4008 use instructions that would be microcoded on the Cell, use the
4009 load/store multiple and string instructions. */
4010 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4011 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4012 | OPTION_MASK_STRING);
4014 /* Don't allow -mmultiple or -mstring on little endian systems
4015 unless the cpu is a 750, because the hardware doesn't support the
4016 instructions used in little endian mode, and causes an alignment
4017 trap. The 750 does not cause an alignment trap (except when the
4018 target is unaligned). */
4020 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4022 if (TARGET_MULTIPLE)
4024 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4025 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4026 warning (0, "-mmultiple is not supported on little endian systems");
4029 if (TARGET_STRING)
4031 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4033 warning (0, "-mstring is not supported on little endian systems");
4037 /* If little-endian, default to -mstrict-align on older processors.
4038 Testing for htm matches power8 and later. */
4039 if (!BYTES_BIG_ENDIAN
4040 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4041 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4043 /* -maltivec={le,be} implies -maltivec. */
4044 if (rs6000_altivec_element_order != 0)
4045 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4047 /* Disallow -maltivec=le in big endian mode for now. This is not
4048 known to be useful for anyone. */
4049 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4051 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4052 rs6000_altivec_element_order = 0;
4055 /* Add some warnings for VSX. */
4056 if (TARGET_VSX)
4058 const char *msg = NULL;
4059 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4060 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4062 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4063 msg = N_("-mvsx requires hardware floating point");
4064 else
4066 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4067 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4070 else if (TARGET_PAIRED_FLOAT)
4071 msg = N_("-mvsx and -mpaired are incompatible");
4072 else if (TARGET_AVOID_XFORM > 0)
4073 msg = N_("-mvsx needs indexed addressing");
4074 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4075 & OPTION_MASK_ALTIVEC))
4077 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4078 msg = N_("-mvsx and -mno-altivec are incompatible");
4079 else
4080 msg = N_("-mno-altivec disables vsx");
4083 if (msg)
4085 warning (0, msg);
4086 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4087 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4091 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4092 the -mcpu setting to enable options that conflict. */
4093 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4094 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4095 | OPTION_MASK_ALTIVEC
4096 | OPTION_MASK_VSX)) != 0)
4097 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4098 | OPTION_MASK_DIRECT_MOVE)
4099 & ~rs6000_isa_flags_explicit);
4101 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4102 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4104 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4105 unless the user explicitly used the -mno-<option> to disable the code. */
4106 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4107 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4108 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4109 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4110 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4111 else if (TARGET_VSX)
4112 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4113 else if (TARGET_POPCNTD)
4114 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4115 else if (TARGET_DFP)
4116 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4117 else if (TARGET_CMPB)
4118 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4119 else if (TARGET_FPRND)
4120 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4121 else if (TARGET_POPCNTB)
4122 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4123 else if (TARGET_ALTIVEC)
4124 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4126 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4128 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4129 error ("-mcrypto requires -maltivec");
4130 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4133 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4135 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4136 error ("-mdirect-move requires -mvsx");
4137 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4140 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4142 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4143 error ("-mpower8-vector requires -maltivec");
4144 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4147 if (TARGET_P8_VECTOR && !TARGET_VSX)
4149 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4150 error ("-mpower8-vector requires -mvsx");
4151 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4154 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4156 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4157 error ("-mvsx-timode requires -mvsx");
4158 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4161 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4163 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4164 error ("-mhard-dfp requires -mhard-float");
4165 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4168 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4169 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4170 set the individual option. */
4171 if (TARGET_UPPER_REGS > 0)
4173 if (TARGET_VSX
4174 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4176 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4177 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4179 if (TARGET_VSX
4180 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4182 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4183 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4185 if (TARGET_P8_VECTOR
4186 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4188 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4189 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4192 else if (TARGET_UPPER_REGS == 0)
4194 if (TARGET_VSX
4195 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4197 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4198 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4200 if (TARGET_VSX
4201 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4203 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4204 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4206 if (TARGET_P8_VECTOR
4207 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4209 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4210 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4214 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4216 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4217 error ("-mupper-regs-df requires -mvsx");
4218 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4221 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4223 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4224 error ("-mupper-regs-di requires -mvsx");
4225 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4228 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4230 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4231 error ("-mupper-regs-sf requires -mpower8-vector");
4232 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4235 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4236 silently turn off quad memory mode. */
4237 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4239 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4240 warning (0, N_("-mquad-memory requires 64-bit mode"));
4242 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4243 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4245 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4246 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4249 /* Non-atomic quad memory load/store are disabled for little endian, since
4250 the words are reversed, but atomic operations can still be done by
4251 swapping the words. */
4252 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4254 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4255 warning (0, N_("-mquad-memory is not available in little endian mode"));
4257 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4260 /* Assume if the user asked for normal quad memory instructions, they want
4261 the atomic versions as well, unless they explicity told us not to use quad
4262 word atomic instructions. */
4263 if (TARGET_QUAD_MEMORY
4264 && !TARGET_QUAD_MEMORY_ATOMIC
4265 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4266 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4268 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4269 generating power8 instructions. */
4270 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4271 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4272 & OPTION_MASK_P8_FUSION);
4274 /* Setting additional fusion flags turns on base fusion. */
4275 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4277 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4279 if (TARGET_P8_FUSION_SIGN)
4280 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4282 if (TARGET_TOC_FUSION)
4283 error ("-mtoc-fusion requires -mpower8-fusion");
4285 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4287 else
4288 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4291 /* Power9 fusion is a superset over power8 fusion. */
4292 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4294 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4296 /* We prefer to not mention undocumented options in
4297 error messages. However, if users have managed to select
4298 power9-fusion without selecting power8-fusion, they
4299 already know about undocumented flags. */
4300 error ("-mpower9-fusion requires -mpower8-fusion");
4301 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4303 else
4304 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4307 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4308 generating power9 instructions. */
4309 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4310 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4311 & OPTION_MASK_P9_FUSION);
4313 /* Power8 does not fuse sign extended loads with the addis. If we are
4314 optimizing at high levels for speed, convert a sign extended load into a
4315 zero extending load, and an explicit sign extension. */
4316 if (TARGET_P8_FUSION
4317 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4318 && optimize_function_for_speed_p (cfun)
4319 && optimize >= 3)
4320 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4322 /* TOC fusion requires 64-bit and medium/large code model. */
4323 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4325 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4326 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4327 warning (0, N_("-mtoc-fusion requires 64-bit"));
4330 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4332 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4333 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4334 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4337 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4338 model. */
4339 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4340 && (TARGET_CMODEL != CMODEL_SMALL)
4341 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4342 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4344 /* ISA 3.0 vector instructions include ISA 2.07. */
4345 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4347 /* We prefer to not mention undocumented options in
4348 error messages. However, if users have managed to select
4349 power9-vector without selecting power8-vector, they
4350 already know about undocumented flags. */
4351 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4352 error ("-mpower9-vector requires -mpower8-vector");
4353 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4356 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4357 -mpower9-dform-vector. */
4358 if (TARGET_P9_DFORM_BOTH > 0)
4360 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4361 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4363 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4364 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4366 else if (TARGET_P9_DFORM_BOTH == 0)
4368 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4369 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4371 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4372 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4375 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4376 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4378 /* We prefer to not mention undocumented options in
4379 error messages. However, if users have managed to select
4380 power9-dform without selecting power9-vector, they
4381 already know about undocumented flags. */
4382 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4383 error ("-mpower9-dform requires -mpower9-vector");
4384 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4385 | OPTION_MASK_P9_DFORM_VECTOR);
4388 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4390 /* We prefer to not mention undocumented options in
4391 error messages. However, if users have managed to select
4392 power9-dform without selecting upper-regs-df, they
4393 already know about undocumented flags. */
4394 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4395 error ("-mpower9-dform requires -mupper-regs-df");
4396 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4399 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4401 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4402 error ("-mpower9-dform requires -mupper-regs-sf");
4403 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4406 /* Enable LRA by default. */
4407 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4408 rs6000_isa_flags |= OPTION_MASK_LRA;
4410 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4411 but do show up with -mno-lra. Given -mlra will become the default once
4412 PR 69847 is fixed, turn off the options with problems by default if
4413 -mno-lra was used, and warn if the user explicitly asked for the option.
4415 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4416 Enable -mvsx-timode by default if LRA and VSX. */
4417 if (!TARGET_LRA)
4419 if (TARGET_VSX_TIMODE)
4421 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4422 warning (0, "-mvsx-timode might need -mlra");
4424 else
4425 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4429 else
4431 if (TARGET_VSX && !TARGET_VSX_TIMODE
4432 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4433 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4436 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4437 support. If we only have ISA 2.06 support, and the user did not specify
4438 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4439 but we don't enable the full vectorization support */
4440 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4441 TARGET_ALLOW_MOVMISALIGN = 1;
4443 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4445 if (TARGET_ALLOW_MOVMISALIGN > 0
4446 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4447 error ("-mallow-movmisalign requires -mvsx");
4449 TARGET_ALLOW_MOVMISALIGN = 0;
4452 /* Determine when unaligned vector accesses are permitted, and when
4453 they are preferred over masked Altivec loads. Note that if
4454 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4455 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4456 not true. */
4457 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4459 if (!TARGET_VSX)
4461 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4462 error ("-mefficient-unaligned-vsx requires -mvsx");
4464 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4467 else if (!TARGET_ALLOW_MOVMISALIGN)
4469 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4470 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4472 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4476 /* Check whether we should allow small integers into VSX registers. We
4477 require direct move to prevent the register allocator from having to move
4478 variables through memory to do moves. SImode can be used on ISA 2.07,
4479 while HImode and QImode require ISA 3.0. */
4480 if (TARGET_VSX_SMALL_INTEGER
4481 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4483 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4484 error ("-mvsx-small-integer requires -mpower8-vector, "
4485 "-mupper-regs-di, and -mdirect-move");
4487 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4490 /* Set long double size before the IEEE 128-bit tests. */
4491 if (!global_options_set.x_rs6000_long_double_type_size)
4493 if (main_target_opt != NULL
4494 && (main_target_opt->x_rs6000_long_double_type_size
4495 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4496 error ("target attribute or pragma changes long double size");
4497 else
4498 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4501 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4502 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4503 pick up this default. */
4504 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4505 if (!global_options_set.x_rs6000_ieeequad)
4506 rs6000_ieeequad = 1;
4507 #endif
4509 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4510 sytems, but don't enable the __float128 keyword. */
4511 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4512 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4513 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4514 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4516 /* IEEE 128-bit floating point requires VSX support. */
4517 if (!TARGET_VSX)
4519 if (TARGET_FLOAT128_KEYWORD)
4521 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4522 error ("-mfloat128 requires VSX support");
4524 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4525 | OPTION_MASK_FLOAT128_KEYWORD
4526 | OPTION_MASK_FLOAT128_HW);
4529 else if (TARGET_FLOAT128_TYPE)
4531 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4532 error ("-mfloat128-type requires VSX support");
4534 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4535 | OPTION_MASK_FLOAT128_KEYWORD
4536 | OPTION_MASK_FLOAT128_HW);
4540 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4541 128-bit floating point support to be enabled. */
4542 if (!TARGET_FLOAT128_TYPE)
4544 if (TARGET_FLOAT128_KEYWORD)
4546 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4548 error ("-mfloat128 requires -mfloat128-type");
4549 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4550 | OPTION_MASK_FLOAT128_KEYWORD
4551 | OPTION_MASK_FLOAT128_HW);
4553 else
4554 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4557 if (TARGET_FLOAT128_HW)
4559 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4561 error ("-mfloat128-hardware requires -mfloat128-type");
4562 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4564 else
4565 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4566 | OPTION_MASK_FLOAT128_KEYWORD
4567 | OPTION_MASK_FLOAT128_HW);
4571 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4572 -mfloat128-hardware by default. However, don't enable the __float128
4573 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4574 -mfloat128 option as well if it was not already set. */
4575 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4576 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4577 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4578 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4580 if (TARGET_FLOAT128_HW
4581 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4583 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4584 error ("-mfloat128-hardware requires full ISA 3.0 support");
4586 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4589 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4590 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4591 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4592 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4594 /* Print the options after updating the defaults. */
4595 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4596 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4598 /* E500mc does "better" if we inline more aggressively. Respect the
4599 user's opinion, though. */
4600 if (rs6000_block_move_inline_limit == 0
4601 && (rs6000_cpu == PROCESSOR_PPCE500MC
4602 || rs6000_cpu == PROCESSOR_PPCE500MC64
4603 || rs6000_cpu == PROCESSOR_PPCE5500
4604 || rs6000_cpu == PROCESSOR_PPCE6500))
4605 rs6000_block_move_inline_limit = 128;
4607 /* store_one_arg depends on expand_block_move to handle at least the
4608 size of reg_parm_stack_space. */
4609 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4610 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4612 if (global_init_p)
4614 /* If the appropriate debug option is enabled, replace the target hooks
4615 with debug versions that call the real version and then prints
4616 debugging information. */
4617 if (TARGET_DEBUG_COST)
4619 targetm.rtx_costs = rs6000_debug_rtx_costs;
4620 targetm.address_cost = rs6000_debug_address_cost;
4621 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4624 if (TARGET_DEBUG_ADDR)
4626 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4627 targetm.legitimize_address = rs6000_debug_legitimize_address;
4628 rs6000_secondary_reload_class_ptr
4629 = rs6000_debug_secondary_reload_class;
4630 rs6000_secondary_memory_needed_ptr
4631 = rs6000_debug_secondary_memory_needed;
4632 rs6000_cannot_change_mode_class_ptr
4633 = rs6000_debug_cannot_change_mode_class;
4634 rs6000_preferred_reload_class_ptr
4635 = rs6000_debug_preferred_reload_class;
4636 rs6000_legitimize_reload_address_ptr
4637 = rs6000_debug_legitimize_reload_address;
4638 rs6000_mode_dependent_address_ptr
4639 = rs6000_debug_mode_dependent_address;
4642 if (rs6000_veclibabi_name)
4644 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4645 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4646 else
4648 error ("unknown vectorization library ABI type (%s) for "
4649 "-mveclibabi= switch", rs6000_veclibabi_name);
4650 ret = false;
4655 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4656 target attribute or pragma which automatically enables both options,
4657 unless the altivec ABI was set. This is set by default for 64-bit, but
4658 not for 32-bit. */
4659 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4660 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4661 | OPTION_MASK_FLOAT128_TYPE
4662 | OPTION_MASK_FLOAT128_KEYWORD)
4663 & ~rs6000_isa_flags_explicit);
4665 /* Enable Altivec ABI for AIX -maltivec. */
4666 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4668 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4669 error ("target attribute or pragma changes AltiVec ABI");
4670 else
4671 rs6000_altivec_abi = 1;
4674 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4675 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4676 be explicitly overridden in either case. */
4677 if (TARGET_ELF)
4679 if (!global_options_set.x_rs6000_altivec_abi
4680 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4682 if (main_target_opt != NULL &&
4683 !main_target_opt->x_rs6000_altivec_abi)
4684 error ("target attribute or pragma changes AltiVec ABI");
4685 else
4686 rs6000_altivec_abi = 1;
4690 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4691 So far, the only darwin64 targets are also MACH-O. */
4692 if (TARGET_MACHO
4693 && DEFAULT_ABI == ABI_DARWIN
4694 && TARGET_64BIT)
4696 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4697 error ("target attribute or pragma changes darwin64 ABI");
4698 else
4700 rs6000_darwin64_abi = 1;
4701 /* Default to natural alignment, for better performance. */
4702 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4706 /* Place FP constants in the constant pool instead of TOC
4707 if section anchors enabled. */
4708 if (flag_section_anchors
4709 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4710 TARGET_NO_FP_IN_TOC = 1;
4712 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4713 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4715 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4716 SUBTARGET_OVERRIDE_OPTIONS;
4717 #endif
4718 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4719 SUBSUBTARGET_OVERRIDE_OPTIONS;
4720 #endif
4721 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4722 SUB3TARGET_OVERRIDE_OPTIONS;
4723 #endif
4725 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4726 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4728 /* For the E500 family of cores, reset the single/double FP flags to let us
4729 check that they remain constant across attributes or pragmas. Also,
4730 clear a possible request for string instructions, not supported and which
4731 we might have silently queried above for -Os.
4733 For other families, clear ISEL in case it was set implicitly.
4736 switch (rs6000_cpu)
4738 case PROCESSOR_PPC8540:
4739 case PROCESSOR_PPC8548:
4740 case PROCESSOR_PPCE500MC:
4741 case PROCESSOR_PPCE500MC64:
4742 case PROCESSOR_PPCE5500:
4743 case PROCESSOR_PPCE6500:
4745 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4746 rs6000_double_float = TARGET_E500_DOUBLE;
4748 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4750 break;
4752 default:
4754 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4755 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4757 break;
4760 if (main_target_opt)
4762 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4763 error ("target attribute or pragma changes single precision floating "
4764 "point");
4765 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4766 error ("target attribute or pragma changes double precision floating "
4767 "point");
4770 /* Detect invalid option combinations with E500. */
4771 CHECK_E500_OPTIONS;
4773 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4774 && rs6000_cpu != PROCESSOR_POWER5
4775 && rs6000_cpu != PROCESSOR_POWER6
4776 && rs6000_cpu != PROCESSOR_POWER7
4777 && rs6000_cpu != PROCESSOR_POWER8
4778 && rs6000_cpu != PROCESSOR_POWER9
4779 && rs6000_cpu != PROCESSOR_PPCA2
4780 && rs6000_cpu != PROCESSOR_CELL
4781 && rs6000_cpu != PROCESSOR_PPC476);
4782 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4783 || rs6000_cpu == PROCESSOR_POWER5
4784 || rs6000_cpu == PROCESSOR_POWER7
4785 || rs6000_cpu == PROCESSOR_POWER8);
4786 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4787 || rs6000_cpu == PROCESSOR_POWER5
4788 || rs6000_cpu == PROCESSOR_POWER6
4789 || rs6000_cpu == PROCESSOR_POWER7
4790 || rs6000_cpu == PROCESSOR_POWER8
4791 || rs6000_cpu == PROCESSOR_POWER9
4792 || rs6000_cpu == PROCESSOR_PPCE500MC
4793 || rs6000_cpu == PROCESSOR_PPCE500MC64
4794 || rs6000_cpu == PROCESSOR_PPCE5500
4795 || rs6000_cpu == PROCESSOR_PPCE6500);
4797 /* Allow debug switches to override the above settings. These are set to -1
4798 in rs6000.opt to indicate the user hasn't directly set the switch. */
4799 if (TARGET_ALWAYS_HINT >= 0)
4800 rs6000_always_hint = TARGET_ALWAYS_HINT;
4802 if (TARGET_SCHED_GROUPS >= 0)
4803 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4805 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4806 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4808 rs6000_sched_restricted_insns_priority
4809 = (rs6000_sched_groups ? 1 : 0);
4811 /* Handle -msched-costly-dep option. */
4812 rs6000_sched_costly_dep
4813 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4815 if (rs6000_sched_costly_dep_str)
4817 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4818 rs6000_sched_costly_dep = no_dep_costly;
4819 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4820 rs6000_sched_costly_dep = all_deps_costly;
4821 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4822 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4823 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4824 rs6000_sched_costly_dep = store_to_load_dep_costly;
4825 else
4826 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4827 atoi (rs6000_sched_costly_dep_str));
4830 /* Handle -minsert-sched-nops option. */
4831 rs6000_sched_insert_nops
4832 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4834 if (rs6000_sched_insert_nops_str)
4836 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4837 rs6000_sched_insert_nops = sched_finish_none;
4838 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4839 rs6000_sched_insert_nops = sched_finish_pad_groups;
4840 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4841 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4842 else
4843 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4844 atoi (rs6000_sched_insert_nops_str));
4847 if (global_init_p)
4849 #ifdef TARGET_REGNAMES
4850 /* If the user desires alternate register names, copy in the
4851 alternate names now. */
4852 if (TARGET_REGNAMES)
4853 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4854 #endif
4856 /* Set aix_struct_return last, after the ABI is determined.
4857 If -maix-struct-return or -msvr4-struct-return was explicitly
4858 used, don't override with the ABI default. */
4859 if (!global_options_set.x_aix_struct_return)
4860 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4862 #if 0
4863 /* IBM XL compiler defaults to unsigned bitfields. */
4864 if (TARGET_XL_COMPAT)
4865 flag_signed_bitfields = 0;
4866 #endif
4868 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4869 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4871 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4873 /* We can only guarantee the availability of DI pseudo-ops when
4874 assembling for 64-bit targets. */
4875 if (!TARGET_64BIT)
4877 targetm.asm_out.aligned_op.di = NULL;
4878 targetm.asm_out.unaligned_op.di = NULL;
4882 /* Set branch target alignment, if not optimizing for size. */
4883 if (!optimize_size)
4885 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4886 aligned 8byte to avoid misprediction by the branch predictor. */
4887 if (rs6000_cpu == PROCESSOR_TITAN
4888 || rs6000_cpu == PROCESSOR_CELL)
4890 if (align_functions <= 0)
4891 align_functions = 8;
4892 if (align_jumps <= 0)
4893 align_jumps = 8;
4894 if (align_loops <= 0)
4895 align_loops = 8;
4897 if (rs6000_align_branch_targets)
4899 if (align_functions <= 0)
4900 align_functions = 16;
4901 if (align_jumps <= 0)
4902 align_jumps = 16;
4903 if (align_loops <= 0)
4905 can_override_loop_align = 1;
4906 align_loops = 16;
4909 if (align_jumps_max_skip <= 0)
4910 align_jumps_max_skip = 15;
4911 if (align_loops_max_skip <= 0)
4912 align_loops_max_skip = 15;
4915 /* Arrange to save and restore machine status around nested functions. */
4916 init_machine_status = rs6000_init_machine_status;
4918 /* We should always be splitting complex arguments, but we can't break
4919 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4920 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4921 targetm.calls.split_complex_arg = NULL;
4923 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4924 if (DEFAULT_ABI == ABI_AIX)
4925 targetm.calls.custom_function_descriptors = 0;
4928 /* Initialize rs6000_cost with the appropriate target costs. */
4929 if (optimize_size)
4930 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4931 else
4932 switch (rs6000_cpu)
4934 case PROCESSOR_RS64A:
4935 rs6000_cost = &rs64a_cost;
4936 break;
4938 case PROCESSOR_MPCCORE:
4939 rs6000_cost = &mpccore_cost;
4940 break;
4942 case PROCESSOR_PPC403:
4943 rs6000_cost = &ppc403_cost;
4944 break;
4946 case PROCESSOR_PPC405:
4947 rs6000_cost = &ppc405_cost;
4948 break;
4950 case PROCESSOR_PPC440:
4951 rs6000_cost = &ppc440_cost;
4952 break;
4954 case PROCESSOR_PPC476:
4955 rs6000_cost = &ppc476_cost;
4956 break;
4958 case PROCESSOR_PPC601:
4959 rs6000_cost = &ppc601_cost;
4960 break;
4962 case PROCESSOR_PPC603:
4963 rs6000_cost = &ppc603_cost;
4964 break;
4966 case PROCESSOR_PPC604:
4967 rs6000_cost = &ppc604_cost;
4968 break;
4970 case PROCESSOR_PPC604e:
4971 rs6000_cost = &ppc604e_cost;
4972 break;
4974 case PROCESSOR_PPC620:
4975 rs6000_cost = &ppc620_cost;
4976 break;
4978 case PROCESSOR_PPC630:
4979 rs6000_cost = &ppc630_cost;
4980 break;
4982 case PROCESSOR_CELL:
4983 rs6000_cost = &ppccell_cost;
4984 break;
4986 case PROCESSOR_PPC750:
4987 case PROCESSOR_PPC7400:
4988 rs6000_cost = &ppc750_cost;
4989 break;
4991 case PROCESSOR_PPC7450:
4992 rs6000_cost = &ppc7450_cost;
4993 break;
4995 case PROCESSOR_PPC8540:
4996 case PROCESSOR_PPC8548:
4997 rs6000_cost = &ppc8540_cost;
4998 break;
5000 case PROCESSOR_PPCE300C2:
5001 case PROCESSOR_PPCE300C3:
5002 rs6000_cost = &ppce300c2c3_cost;
5003 break;
5005 case PROCESSOR_PPCE500MC:
5006 rs6000_cost = &ppce500mc_cost;
5007 break;
5009 case PROCESSOR_PPCE500MC64:
5010 rs6000_cost = &ppce500mc64_cost;
5011 break;
5013 case PROCESSOR_PPCE5500:
5014 rs6000_cost = &ppce5500_cost;
5015 break;
5017 case PROCESSOR_PPCE6500:
5018 rs6000_cost = &ppce6500_cost;
5019 break;
5021 case PROCESSOR_TITAN:
5022 rs6000_cost = &titan_cost;
5023 break;
5025 case PROCESSOR_POWER4:
5026 case PROCESSOR_POWER5:
5027 rs6000_cost = &power4_cost;
5028 break;
5030 case PROCESSOR_POWER6:
5031 rs6000_cost = &power6_cost;
5032 break;
5034 case PROCESSOR_POWER7:
5035 rs6000_cost = &power7_cost;
5036 break;
5038 case PROCESSOR_POWER8:
5039 rs6000_cost = &power8_cost;
5040 break;
5042 case PROCESSOR_POWER9:
5043 rs6000_cost = &power9_cost;
5044 break;
5046 case PROCESSOR_PPCA2:
5047 rs6000_cost = &ppca2_cost;
5048 break;
5050 default:
5051 gcc_unreachable ();
5054 if (global_init_p)
5056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5057 rs6000_cost->simultaneous_prefetches,
5058 global_options.x_param_values,
5059 global_options_set.x_param_values);
5060 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5061 global_options.x_param_values,
5062 global_options_set.x_param_values);
5063 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5064 rs6000_cost->cache_line_size,
5065 global_options.x_param_values,
5066 global_options_set.x_param_values);
5067 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5068 global_options.x_param_values,
5069 global_options_set.x_param_values);
5071 /* Increase loop peeling limits based on performance analysis. */
5072 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5073 global_options.x_param_values,
5074 global_options_set.x_param_values);
5075 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5076 global_options.x_param_values,
5077 global_options_set.x_param_values);
5079 /* If using typedef char *va_list, signal that
5080 __builtin_va_start (&ap, 0) can be optimized to
5081 ap = __builtin_next_arg (0). */
5082 if (DEFAULT_ABI != ABI_V4)
5083 targetm.expand_builtin_va_start = NULL;
5086 /* Set up single/double float flags.
5087 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5088 then set both flags. */
5089 if (TARGET_HARD_FLOAT && TARGET_FPRS
5090 && rs6000_single_float == 0 && rs6000_double_float == 0)
5091 rs6000_single_float = rs6000_double_float = 1;
5093 /* If not explicitly specified via option, decide whether to generate indexed
5094 load/store instructions. */
5095 if (TARGET_AVOID_XFORM == -1)
5096 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5097 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5098 need indexed accesses and the type used is the scalar type of the element
5099 being loaded or stored. */
5100 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5101 && !TARGET_ALTIVEC);
5103 /* Set the -mrecip options. */
5104 if (rs6000_recip_name)
5106 char *p = ASTRDUP (rs6000_recip_name);
5107 char *q;
5108 unsigned int mask, i;
5109 bool invert;
5111 while ((q = strtok (p, ",")) != NULL)
5113 p = NULL;
5114 if (*q == '!')
5116 invert = true;
5117 q++;
5119 else
5120 invert = false;
5122 if (!strcmp (q, "default"))
5123 mask = ((TARGET_RECIP_PRECISION)
5124 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5125 else
5127 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5128 if (!strcmp (q, recip_options[i].string))
5130 mask = recip_options[i].mask;
5131 break;
5134 if (i == ARRAY_SIZE (recip_options))
5136 error ("unknown option for -mrecip=%s", q);
5137 invert = false;
5138 mask = 0;
5139 ret = false;
5143 if (invert)
5144 rs6000_recip_control &= ~mask;
5145 else
5146 rs6000_recip_control |= mask;
5150 /* Set the builtin mask of the various options used that could affect which
5151 builtins were used. In the past we used target_flags, but we've run out
5152 of bits, and some options like SPE and PAIRED are no longer in
5153 target_flags. */
5154 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5155 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5156 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5157 rs6000_builtin_mask);
5159 /* Initialize all of the registers. */
5160 rs6000_init_hard_regno_mode_ok (global_init_p);
5162 /* Save the initial options in case the user does function specific options */
5163 if (global_init_p)
5164 target_option_default_node = target_option_current_node
5165 = build_target_option_node (&global_options);
5167 /* If not explicitly specified via option, decide whether to generate the
5168 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5169 if (TARGET_LINK_STACK == -1)
5170 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5172 return ret;
5175 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5176 define the target cpu type. */
5178 static void
5179 rs6000_option_override (void)
5181 (void) rs6000_option_override_internal (true);
5183 /* Register machine-specific passes. This needs to be done at start-up.
5184 It's convenient to do it here (like i386 does). */
5185 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5187 struct register_pass_info analyze_swaps_info
5188 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5190 register_pass (&analyze_swaps_info);
5194 /* Implement targetm.vectorize.builtin_mask_for_load. */
5195 static tree
5196 rs6000_builtin_mask_for_load (void)
5198 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5199 if ((TARGET_ALTIVEC && !TARGET_VSX)
5200 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5201 return altivec_builtin_mask_for_load;
5202 else
5203 return 0;
5206 /* Implement LOOP_ALIGN. */
5208 rs6000_loop_align (rtx label)
5210 basic_block bb;
5211 int ninsns;
5213 /* Don't override loop alignment if -falign-loops was specified. */
5214 if (!can_override_loop_align)
5215 return align_loops_log;
5217 bb = BLOCK_FOR_INSN (label);
5218 ninsns = num_loop_insns(bb->loop_father);
5220 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5221 if (ninsns > 4 && ninsns <= 8
5222 && (rs6000_cpu == PROCESSOR_POWER4
5223 || rs6000_cpu == PROCESSOR_POWER5
5224 || rs6000_cpu == PROCESSOR_POWER6
5225 || rs6000_cpu == PROCESSOR_POWER7
5226 || rs6000_cpu == PROCESSOR_POWER8
5227 || rs6000_cpu == PROCESSOR_POWER9))
5228 return 5;
5229 else
5230 return align_loops_log;
5233 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5234 static int
5235 rs6000_loop_align_max_skip (rtx_insn *label)
5237 return (1 << rs6000_loop_align (label)) - 1;
5240 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5241 after applying N number of iterations. This routine does not determine
5242 how may iterations are required to reach desired alignment. */
5244 static bool
5245 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5247 if (is_packed)
5248 return false;
5250 if (TARGET_32BIT)
5252 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5253 return true;
5255 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5256 return true;
5258 return false;
5260 else
5262 if (TARGET_MACHO)
5263 return false;
5265 /* Assuming that all other types are naturally aligned. CHECKME! */
5266 return true;
5270 /* Return true if the vector misalignment factor is supported by the
5271 target. */
5272 static bool
5273 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5274 const_tree type,
5275 int misalignment,
5276 bool is_packed)
5278 if (TARGET_VSX)
5280 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5281 return true;
5283 /* Return if movmisalign pattern is not supported for this mode. */
5284 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5285 return false;
5287 if (misalignment == -1)
5289 /* Misalignment factor is unknown at compile time but we know
5290 it's word aligned. */
5291 if (rs6000_vector_alignment_reachable (type, is_packed))
5293 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5295 if (element_size == 64 || element_size == 32)
5296 return true;
5299 return false;
5302 /* VSX supports word-aligned vector. */
5303 if (misalignment % 4 == 0)
5304 return true;
5306 return false;
5309 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5310 static int
5311 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5312 tree vectype, int misalign)
5314 unsigned elements;
5315 tree elem_type;
5317 switch (type_of_cost)
5319 case scalar_stmt:
5320 case scalar_load:
5321 case scalar_store:
5322 case vector_stmt:
5323 case vector_load:
5324 case vector_store:
5325 case vec_to_scalar:
5326 case scalar_to_vec:
5327 case cond_branch_not_taken:
5328 return 1;
5330 case vec_perm:
5331 if (TARGET_VSX)
5332 return 3;
5333 else
5334 return 1;
5336 case vec_promote_demote:
5337 if (TARGET_VSX)
5338 return 4;
5339 else
5340 return 1;
5342 case cond_branch_taken:
5343 return 3;
5345 case unaligned_load:
5346 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5347 return 1;
5349 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5351 elements = TYPE_VECTOR_SUBPARTS (vectype);
5352 if (elements == 2)
5353 /* Double word aligned. */
5354 return 2;
5356 if (elements == 4)
5358 switch (misalign)
5360 case 8:
5361 /* Double word aligned. */
5362 return 2;
5364 case -1:
5365 /* Unknown misalignment. */
5366 case 4:
5367 case 12:
5368 /* Word aligned. */
5369 return 22;
5371 default:
5372 gcc_unreachable ();
5377 if (TARGET_ALTIVEC)
5378 /* Misaligned loads are not supported. */
5379 gcc_unreachable ();
5381 return 2;
5383 case unaligned_store:
5384 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5385 return 1;
5387 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5389 elements = TYPE_VECTOR_SUBPARTS (vectype);
5390 if (elements == 2)
5391 /* Double word aligned. */
5392 return 2;
5394 if (elements == 4)
5396 switch (misalign)
5398 case 8:
5399 /* Double word aligned. */
5400 return 2;
5402 case -1:
5403 /* Unknown misalignment. */
5404 case 4:
5405 case 12:
5406 /* Word aligned. */
5407 return 23;
5409 default:
5410 gcc_unreachable ();
5415 if (TARGET_ALTIVEC)
5416 /* Misaligned stores are not supported. */
5417 gcc_unreachable ();
5419 return 2;
5421 case vec_construct:
5422 /* This is a rough approximation assuming non-constant elements
5423 constructed into a vector via element insertion. FIXME:
5424 vec_construct is not granular enough for uniformly good
5425 decisions. If the initialization is a splat, this is
5426 cheaper than we estimate. Improve this someday. */
5427 elem_type = TREE_TYPE (vectype);
5428 /* 32-bit vectors loaded into registers are stored as double
5429 precision, so we need 2 permutes, 2 converts, and 1 merge
5430 to construct a vector of short floats from them. */
5431 if (SCALAR_FLOAT_TYPE_P (elem_type)
5432 && TYPE_PRECISION (elem_type) == 32)
5433 return 5;
5434 else
5435 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5437 default:
5438 gcc_unreachable ();
5442 /* Implement targetm.vectorize.preferred_simd_mode. */
5444 static machine_mode
5445 rs6000_preferred_simd_mode (machine_mode mode)
5447 if (TARGET_VSX)
5448 switch (mode)
5450 case DFmode:
5451 return V2DFmode;
5452 default:;
5454 if (TARGET_ALTIVEC || TARGET_VSX)
5455 switch (mode)
5457 case SFmode:
5458 return V4SFmode;
5459 case TImode:
5460 return V1TImode;
5461 case DImode:
5462 return V2DImode;
5463 case SImode:
5464 return V4SImode;
5465 case HImode:
5466 return V8HImode;
5467 case QImode:
5468 return V16QImode;
5469 default:;
5471 if (TARGET_SPE)
5472 switch (mode)
5474 case SFmode:
5475 return V2SFmode;
5476 case SImode:
5477 return V2SImode;
5478 default:;
5480 if (TARGET_PAIRED_FLOAT
5481 && mode == SFmode)
5482 return V2SFmode;
5483 return word_mode;
5486 typedef struct _rs6000_cost_data
5488 struct loop *loop_info;
5489 unsigned cost[3];
5490 } rs6000_cost_data;
5492 /* Test for likely overcommitment of vector hardware resources. If a
5493 loop iteration is relatively large, and too large a percentage of
5494 instructions in the loop are vectorized, the cost model may not
5495 adequately reflect delays from unavailable vector resources.
5496 Penalize the loop body cost for this case. */
5498 static void
5499 rs6000_density_test (rs6000_cost_data *data)
5501 const int DENSITY_PCT_THRESHOLD = 85;
5502 const int DENSITY_SIZE_THRESHOLD = 70;
5503 const int DENSITY_PENALTY = 10;
5504 struct loop *loop = data->loop_info;
5505 basic_block *bbs = get_loop_body (loop);
5506 int nbbs = loop->num_nodes;
5507 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5508 int i, density_pct;
5510 for (i = 0; i < nbbs; i++)
5512 basic_block bb = bbs[i];
5513 gimple_stmt_iterator gsi;
5515 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5517 gimple *stmt = gsi_stmt (gsi);
5518 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5520 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5521 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5522 not_vec_cost++;
5526 free (bbs);
5527 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5529 if (density_pct > DENSITY_PCT_THRESHOLD
5530 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5532 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_NOTE, vect_location,
5535 "density %d%%, cost %d exceeds threshold, penalizing "
5536 "loop body cost by %d%%", density_pct,
5537 vec_cost + not_vec_cost, DENSITY_PENALTY);
5541 /* Implement targetm.vectorize.init_cost. */
5543 static void *
5544 rs6000_init_cost (struct loop *loop_info)
5546 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5547 data->loop_info = loop_info;
5548 data->cost[vect_prologue] = 0;
5549 data->cost[vect_body] = 0;
5550 data->cost[vect_epilogue] = 0;
5551 return data;
5554 /* Implement targetm.vectorize.add_stmt_cost. */
5556 static unsigned
5557 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5558 struct _stmt_vec_info *stmt_info, int misalign,
5559 enum vect_cost_model_location where)
5561 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5562 unsigned retval = 0;
5564 if (flag_vect_cost_model)
5566 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5567 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5568 misalign);
5569 /* Statements in an inner loop relative to the loop being
5570 vectorized are weighted more heavily. The value here is
5571 arbitrary and could potentially be improved with analysis. */
5572 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5573 count *= 50; /* FIXME. */
5575 retval = (unsigned) (count * stmt_cost);
5576 cost_data->cost[where] += retval;
5579 return retval;
5582 /* Implement targetm.vectorize.finish_cost. */
5584 static void
5585 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5586 unsigned *body_cost, unsigned *epilogue_cost)
5588 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5590 if (cost_data->loop_info)
5591 rs6000_density_test (cost_data);
5593 *prologue_cost = cost_data->cost[vect_prologue];
5594 *body_cost = cost_data->cost[vect_body];
5595 *epilogue_cost = cost_data->cost[vect_epilogue];
5598 /* Implement targetm.vectorize.destroy_cost_data. */
5600 static void
5601 rs6000_destroy_cost_data (void *data)
5603 free (data);
5606 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5607 library with vectorized intrinsics. */
5609 static tree
5610 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5611 tree type_in)
5613 char name[32];
5614 const char *suffix = NULL;
5615 tree fntype, new_fndecl, bdecl = NULL_TREE;
5616 int n_args = 1;
5617 const char *bname;
5618 machine_mode el_mode, in_mode;
5619 int n, in_n;
5621 /* Libmass is suitable for unsafe math only as it does not correctly support
5622 parts of IEEE with the required precision such as denormals. Only support
5623 it if we have VSX to use the simd d2 or f4 functions.
5624 XXX: Add variable length support. */
5625 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5626 return NULL_TREE;
5628 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5629 n = TYPE_VECTOR_SUBPARTS (type_out);
5630 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5631 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5632 if (el_mode != in_mode
5633 || n != in_n)
5634 return NULL_TREE;
5636 switch (fn)
5638 CASE_CFN_ATAN2:
5639 CASE_CFN_HYPOT:
5640 CASE_CFN_POW:
5641 n_args = 2;
5642 gcc_fallthrough ();
5644 CASE_CFN_ACOS:
5645 CASE_CFN_ACOSH:
5646 CASE_CFN_ASIN:
5647 CASE_CFN_ASINH:
5648 CASE_CFN_ATAN:
5649 CASE_CFN_ATANH:
5650 CASE_CFN_CBRT:
5651 CASE_CFN_COS:
5652 CASE_CFN_COSH:
5653 CASE_CFN_ERF:
5654 CASE_CFN_ERFC:
5655 CASE_CFN_EXP2:
5656 CASE_CFN_EXP:
5657 CASE_CFN_EXPM1:
5658 CASE_CFN_LGAMMA:
5659 CASE_CFN_LOG10:
5660 CASE_CFN_LOG1P:
5661 CASE_CFN_LOG2:
5662 CASE_CFN_LOG:
5663 CASE_CFN_SIN:
5664 CASE_CFN_SINH:
5665 CASE_CFN_SQRT:
5666 CASE_CFN_TAN:
5667 CASE_CFN_TANH:
5668 if (el_mode == DFmode && n == 2)
5670 bdecl = mathfn_built_in (double_type_node, fn);
5671 suffix = "d2"; /* pow -> powd2 */
5673 else if (el_mode == SFmode && n == 4)
5675 bdecl = mathfn_built_in (float_type_node, fn);
5676 suffix = "4"; /* powf -> powf4 */
5678 else
5679 return NULL_TREE;
5680 if (!bdecl)
5681 return NULL_TREE;
5682 break;
5684 default:
5685 return NULL_TREE;
5688 gcc_assert (suffix != NULL);
5689 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5690 if (!bname)
5691 return NULL_TREE;
5693 strcpy (name, bname + sizeof ("__builtin_") - 1);
5694 strcat (name, suffix);
5696 if (n_args == 1)
5697 fntype = build_function_type_list (type_out, type_in, NULL);
5698 else if (n_args == 2)
5699 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5700 else
5701 gcc_unreachable ();
5703 /* Build a function declaration for the vectorized function. */
5704 new_fndecl = build_decl (BUILTINS_LOCATION,
5705 FUNCTION_DECL, get_identifier (name), fntype);
5706 TREE_PUBLIC (new_fndecl) = 1;
5707 DECL_EXTERNAL (new_fndecl) = 1;
5708 DECL_IS_NOVOPS (new_fndecl) = 1;
5709 TREE_READONLY (new_fndecl) = 1;
5711 return new_fndecl;
5714 /* Returns a function decl for a vectorized version of the builtin function
5715 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5716 if it is not available. */
5718 static tree
5719 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5720 tree type_in)
5722 machine_mode in_mode, out_mode;
5723 int in_n, out_n;
5725 if (TARGET_DEBUG_BUILTIN)
5726 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5727 combined_fn_name (combined_fn (fn)),
5728 GET_MODE_NAME (TYPE_MODE (type_out)),
5729 GET_MODE_NAME (TYPE_MODE (type_in)));
5731 if (TREE_CODE (type_out) != VECTOR_TYPE
5732 || TREE_CODE (type_in) != VECTOR_TYPE
5733 || !TARGET_VECTORIZE_BUILTINS)
5734 return NULL_TREE;
5736 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5737 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5738 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5739 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5741 switch (fn)
5743 CASE_CFN_COPYSIGN:
5744 if (VECTOR_UNIT_VSX_P (V2DFmode)
5745 && out_mode == DFmode && out_n == 2
5746 && in_mode == DFmode && in_n == 2)
5747 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5748 if (VECTOR_UNIT_VSX_P (V4SFmode)
5749 && out_mode == SFmode && out_n == 4
5750 && in_mode == SFmode && in_n == 4)
5751 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5752 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5753 && out_mode == SFmode && out_n == 4
5754 && in_mode == SFmode && in_n == 4)
5755 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5756 break;
5757 CASE_CFN_CEIL:
5758 if (VECTOR_UNIT_VSX_P (V2DFmode)
5759 && out_mode == DFmode && out_n == 2
5760 && in_mode == DFmode && in_n == 2)
5761 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5762 if (VECTOR_UNIT_VSX_P (V4SFmode)
5763 && out_mode == SFmode && out_n == 4
5764 && in_mode == SFmode && in_n == 4)
5765 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5766 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5767 && out_mode == SFmode && out_n == 4
5768 && in_mode == SFmode && in_n == 4)
5769 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5770 break;
5771 CASE_CFN_FLOOR:
5772 if (VECTOR_UNIT_VSX_P (V2DFmode)
5773 && out_mode == DFmode && out_n == 2
5774 && in_mode == DFmode && in_n == 2)
5775 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5776 if (VECTOR_UNIT_VSX_P (V4SFmode)
5777 && out_mode == SFmode && out_n == 4
5778 && in_mode == SFmode && in_n == 4)
5779 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5780 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5781 && out_mode == SFmode && out_n == 4
5782 && in_mode == SFmode && in_n == 4)
5783 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5784 break;
5785 CASE_CFN_FMA:
5786 if (VECTOR_UNIT_VSX_P (V2DFmode)
5787 && out_mode == DFmode && out_n == 2
5788 && in_mode == DFmode && in_n == 2)
5789 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5790 if (VECTOR_UNIT_VSX_P (V4SFmode)
5791 && out_mode == SFmode && out_n == 4
5792 && in_mode == SFmode && in_n == 4)
5793 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5794 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5795 && out_mode == SFmode && out_n == 4
5796 && in_mode == SFmode && in_n == 4)
5797 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5798 break;
5799 CASE_CFN_TRUNC:
5800 if (VECTOR_UNIT_VSX_P (V2DFmode)
5801 && out_mode == DFmode && out_n == 2
5802 && in_mode == DFmode && in_n == 2)
5803 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5804 if (VECTOR_UNIT_VSX_P (V4SFmode)
5805 && out_mode == SFmode && out_n == 4
5806 && in_mode == SFmode && in_n == 4)
5807 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5808 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5809 && out_mode == SFmode && out_n == 4
5810 && in_mode == SFmode && in_n == 4)
5811 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5812 break;
5813 CASE_CFN_NEARBYINT:
5814 if (VECTOR_UNIT_VSX_P (V2DFmode)
5815 && flag_unsafe_math_optimizations
5816 && out_mode == DFmode && out_n == 2
5817 && in_mode == DFmode && in_n == 2)
5818 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5819 if (VECTOR_UNIT_VSX_P (V4SFmode)
5820 && flag_unsafe_math_optimizations
5821 && out_mode == SFmode && out_n == 4
5822 && in_mode == SFmode && in_n == 4)
5823 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5824 break;
5825 CASE_CFN_RINT:
5826 if (VECTOR_UNIT_VSX_P (V2DFmode)
5827 && !flag_trapping_math
5828 && out_mode == DFmode && out_n == 2
5829 && in_mode == DFmode && in_n == 2)
5830 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5831 if (VECTOR_UNIT_VSX_P (V4SFmode)
5832 && !flag_trapping_math
5833 && out_mode == SFmode && out_n == 4
5834 && in_mode == SFmode && in_n == 4)
5835 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5836 break;
5837 default:
5838 break;
5841 /* Generate calls to libmass if appropriate. */
5842 if (rs6000_veclib_handler)
5843 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5845 return NULL_TREE;
5848 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5850 static tree
5851 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5852 tree type_in)
5854 machine_mode in_mode, out_mode;
5855 int in_n, out_n;
5857 if (TARGET_DEBUG_BUILTIN)
5858 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5859 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5860 GET_MODE_NAME (TYPE_MODE (type_out)),
5861 GET_MODE_NAME (TYPE_MODE (type_in)));
5863 if (TREE_CODE (type_out) != VECTOR_TYPE
5864 || TREE_CODE (type_in) != VECTOR_TYPE
5865 || !TARGET_VECTORIZE_BUILTINS)
5866 return NULL_TREE;
5868 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5869 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5870 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5871 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5873 enum rs6000_builtins fn
5874 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5875 switch (fn)
5877 case RS6000_BUILTIN_RSQRTF:
5878 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5879 && out_mode == SFmode && out_n == 4
5880 && in_mode == SFmode && in_n == 4)
5881 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5882 break;
5883 case RS6000_BUILTIN_RSQRT:
5884 if (VECTOR_UNIT_VSX_P (V2DFmode)
5885 && out_mode == DFmode && out_n == 2
5886 && in_mode == DFmode && in_n == 2)
5887 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5888 break;
5889 case RS6000_BUILTIN_RECIPF:
5890 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5891 && out_mode == SFmode && out_n == 4
5892 && in_mode == SFmode && in_n == 4)
5893 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5894 break;
5895 case RS6000_BUILTIN_RECIP:
5896 if (VECTOR_UNIT_VSX_P (V2DFmode)
5897 && out_mode == DFmode && out_n == 2
5898 && in_mode == DFmode && in_n == 2)
5899 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5900 break;
5901 default:
5902 break;
5904 return NULL_TREE;
5907 /* Default CPU string for rs6000*_file_start functions. */
5908 static const char *rs6000_default_cpu;
5910 /* Do anything needed at the start of the asm file. */
5912 static void
5913 rs6000_file_start (void)
5915 char buffer[80];
5916 const char *start = buffer;
5917 FILE *file = asm_out_file;
5919 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5921 default_file_start ();
5923 if (flag_verbose_asm)
5925 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5927 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5929 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5930 start = "";
5933 if (global_options_set.x_rs6000_cpu_index)
5935 fprintf (file, "%s -mcpu=%s", start,
5936 processor_target_table[rs6000_cpu_index].name);
5937 start = "";
5940 if (global_options_set.x_rs6000_tune_index)
5942 fprintf (file, "%s -mtune=%s", start,
5943 processor_target_table[rs6000_tune_index].name);
5944 start = "";
5947 if (PPC405_ERRATUM77)
5949 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5950 start = "";
5953 #ifdef USING_ELFOS_H
5954 switch (rs6000_sdata)
5956 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5957 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5958 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5959 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5962 if (rs6000_sdata && g_switch_value)
5964 fprintf (file, "%s -G %d", start,
5965 g_switch_value);
5966 start = "";
5968 #endif
5970 if (*start == '\0')
5971 putc ('\n', file);
5974 #ifdef USING_ELFOS_H
5975 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5976 && !global_options_set.x_rs6000_cpu_index)
5978 fputs ("\t.machine ", asm_out_file);
5979 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5980 fputs ("power9\n", asm_out_file);
5981 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5982 fputs ("power8\n", asm_out_file);
5983 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5984 fputs ("power7\n", asm_out_file);
5985 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5986 fputs ("power6\n", asm_out_file);
5987 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5988 fputs ("power5\n", asm_out_file);
5989 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5990 fputs ("power4\n", asm_out_file);
5991 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5992 fputs ("ppc64\n", asm_out_file);
5993 else
5994 fputs ("ppc\n", asm_out_file);
5996 #endif
5998 if (DEFAULT_ABI == ABI_ELFv2)
5999 fprintf (file, "\t.abiversion 2\n");
6003 /* Return nonzero if this function is known to have a null epilogue. */
6006 direct_return (void)
6008 if (reload_completed)
6010 rs6000_stack_t *info = rs6000_stack_info ();
6012 if (info->first_gp_reg_save == 32
6013 && info->first_fp_reg_save == 64
6014 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6015 && ! info->lr_save_p
6016 && ! info->cr_save_p
6017 && info->vrsave_size == 0
6018 && ! info->push_p)
6019 return 1;
6022 return 0;
6025 /* Return the number of instructions it takes to form a constant in an
6026 integer register. */
6029 num_insns_constant_wide (HOST_WIDE_INT value)
6031 /* signed constant loadable with addi */
6032 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6033 return 1;
6035 /* constant loadable with addis */
6036 else if ((value & 0xffff) == 0
6037 && (value >> 31 == -1 || value >> 31 == 0))
6038 return 1;
6040 else if (TARGET_POWERPC64)
6042 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6043 HOST_WIDE_INT high = value >> 31;
6045 if (high == 0 || high == -1)
6046 return 2;
6048 high >>= 1;
6050 if (low == 0)
6051 return num_insns_constant_wide (high) + 1;
6052 else if (high == 0)
6053 return num_insns_constant_wide (low) + 1;
6054 else
6055 return (num_insns_constant_wide (high)
6056 + num_insns_constant_wide (low) + 1);
6059 else
6060 return 2;
6064 num_insns_constant (rtx op, machine_mode mode)
6066 HOST_WIDE_INT low, high;
6068 switch (GET_CODE (op))
6070 case CONST_INT:
6071 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6072 && rs6000_is_valid_and_mask (op, mode))
6073 return 2;
6074 else
6075 return num_insns_constant_wide (INTVAL (op));
6077 case CONST_WIDE_INT:
6079 int i;
6080 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6081 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6082 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6083 return ins;
6086 case CONST_DOUBLE:
6087 if (mode == SFmode || mode == SDmode)
6089 long l;
6091 if (DECIMAL_FLOAT_MODE_P (mode))
6092 REAL_VALUE_TO_TARGET_DECIMAL32
6093 (*CONST_DOUBLE_REAL_VALUE (op), l);
6094 else
6095 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6096 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6099 long l[2];
6100 if (DECIMAL_FLOAT_MODE_P (mode))
6101 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6102 else
6103 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6104 high = l[WORDS_BIG_ENDIAN == 0];
6105 low = l[WORDS_BIG_ENDIAN != 0];
6107 if (TARGET_32BIT)
6108 return (num_insns_constant_wide (low)
6109 + num_insns_constant_wide (high));
6110 else
6112 if ((high == 0 && low >= 0)
6113 || (high == -1 && low < 0))
6114 return num_insns_constant_wide (low);
6116 else if (rs6000_is_valid_and_mask (op, mode))
6117 return 2;
6119 else if (low == 0)
6120 return num_insns_constant_wide (high) + 1;
6122 else
6123 return (num_insns_constant_wide (high)
6124 + num_insns_constant_wide (low) + 1);
6127 default:
6128 gcc_unreachable ();
6132 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6133 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6134 corresponding element of the vector, but for V4SFmode and V2SFmode,
6135 the corresponding "float" is interpreted as an SImode integer. */
6137 HOST_WIDE_INT
6138 const_vector_elt_as_int (rtx op, unsigned int elt)
6140 rtx tmp;
6142 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6143 gcc_assert (GET_MODE (op) != V2DImode
6144 && GET_MODE (op) != V2DFmode);
6146 tmp = CONST_VECTOR_ELT (op, elt);
6147 if (GET_MODE (op) == V4SFmode
6148 || GET_MODE (op) == V2SFmode)
6149 tmp = gen_lowpart (SImode, tmp);
6150 return INTVAL (tmp);
6153 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6154 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6155 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6156 all items are set to the same value and contain COPIES replicas of the
6157 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6158 operand and the others are set to the value of the operand's msb. */
6160 static bool
6161 vspltis_constant (rtx op, unsigned step, unsigned copies)
6163 machine_mode mode = GET_MODE (op);
6164 machine_mode inner = GET_MODE_INNER (mode);
6166 unsigned i;
6167 unsigned nunits;
6168 unsigned bitsize;
6169 unsigned mask;
6171 HOST_WIDE_INT val;
6172 HOST_WIDE_INT splat_val;
6173 HOST_WIDE_INT msb_val;
6175 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6176 return false;
6178 nunits = GET_MODE_NUNITS (mode);
6179 bitsize = GET_MODE_BITSIZE (inner);
6180 mask = GET_MODE_MASK (inner);
6182 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6183 splat_val = val;
6184 msb_val = val >= 0 ? 0 : -1;
6186 /* Construct the value to be splatted, if possible. If not, return 0. */
6187 for (i = 2; i <= copies; i *= 2)
6189 HOST_WIDE_INT small_val;
6190 bitsize /= 2;
6191 small_val = splat_val >> bitsize;
6192 mask >>= bitsize;
6193 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6194 return false;
6195 splat_val = small_val;
6198 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6199 if (EASY_VECTOR_15 (splat_val))
6202 /* Also check if we can splat, and then add the result to itself. Do so if
6203 the value is positive, of if the splat instruction is using OP's mode;
6204 for splat_val < 0, the splat and the add should use the same mode. */
6205 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6206 && (splat_val >= 0 || (step == 1 && copies == 1)))
6209 /* Also check if are loading up the most significant bit which can be done by
6210 loading up -1 and shifting the value left by -1. */
6211 else if (EASY_VECTOR_MSB (splat_val, inner))
6214 else
6215 return false;
6217 /* Check if VAL is present in every STEP-th element, and the
6218 other elements are filled with its most significant bit. */
6219 for (i = 1; i < nunits; ++i)
6221 HOST_WIDE_INT desired_val;
6222 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6223 if ((i & (step - 1)) == 0)
6224 desired_val = val;
6225 else
6226 desired_val = msb_val;
6228 if (desired_val != const_vector_elt_as_int (op, elt))
6229 return false;
6232 return true;
6235 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6236 instruction, filling in the bottom elements with 0 or -1.
6238 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6239 for the number of zeroes to shift in, or negative for the number of 0xff
6240 bytes to shift in.
6242 OP is a CONST_VECTOR. */
6245 vspltis_shifted (rtx op)
6247 machine_mode mode = GET_MODE (op);
6248 machine_mode inner = GET_MODE_INNER (mode);
6250 unsigned i, j;
6251 unsigned nunits;
6252 unsigned mask;
6254 HOST_WIDE_INT val;
6256 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6257 return false;
6259 /* We need to create pseudo registers to do the shift, so don't recognize
6260 shift vector constants after reload. */
6261 if (!can_create_pseudo_p ())
6262 return false;
6264 nunits = GET_MODE_NUNITS (mode);
6265 mask = GET_MODE_MASK (inner);
6267 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6269 /* Check if the value can really be the operand of a vspltis[bhw]. */
6270 if (EASY_VECTOR_15 (val))
6273 /* Also check if we are loading up the most significant bit which can be done
6274 by loading up -1 and shifting the value left by -1. */
6275 else if (EASY_VECTOR_MSB (val, inner))
6278 else
6279 return 0;
6281 /* Check if VAL is present in every STEP-th element until we find elements
6282 that are 0 or all 1 bits. */
6283 for (i = 1; i < nunits; ++i)
6285 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6286 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6288 /* If the value isn't the splat value, check for the remaining elements
6289 being 0/-1. */
6290 if (val != elt_val)
6292 if (elt_val == 0)
6294 for (j = i+1; j < nunits; ++j)
6296 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6297 if (const_vector_elt_as_int (op, elt2) != 0)
6298 return 0;
6301 return (nunits - i) * GET_MODE_SIZE (inner);
6304 else if ((elt_val & mask) == mask)
6306 for (j = i+1; j < nunits; ++j)
6308 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6309 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6310 return 0;
6313 return -((nunits - i) * GET_MODE_SIZE (inner));
6316 else
6317 return 0;
6321 /* If all elements are equal, we don't need to do VLSDOI. */
6322 return 0;
6326 /* Return true if OP is of the given MODE and can be synthesized
6327 with a vspltisb, vspltish or vspltisw. */
6329 bool
6330 easy_altivec_constant (rtx op, machine_mode mode)
6332 unsigned step, copies;
6334 if (mode == VOIDmode)
6335 mode = GET_MODE (op);
6336 else if (mode != GET_MODE (op))
6337 return false;
6339 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6340 constants. */
6341 if (mode == V2DFmode)
6342 return zero_constant (op, mode);
6344 else if (mode == V2DImode)
6346 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6347 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6348 return false;
6350 if (zero_constant (op, mode))
6351 return true;
6353 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6354 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6355 return true;
6357 return false;
6360 /* V1TImode is a special container for TImode. Ignore for now. */
6361 else if (mode == V1TImode)
6362 return false;
6364 /* Start with a vspltisw. */
6365 step = GET_MODE_NUNITS (mode) / 4;
6366 copies = 1;
6368 if (vspltis_constant (op, step, copies))
6369 return true;
6371 /* Then try with a vspltish. */
6372 if (step == 1)
6373 copies <<= 1;
6374 else
6375 step >>= 1;
6377 if (vspltis_constant (op, step, copies))
6378 return true;
6380 /* And finally a vspltisb. */
6381 if (step == 1)
6382 copies <<= 1;
6383 else
6384 step >>= 1;
6386 if (vspltis_constant (op, step, copies))
6387 return true;
6389 if (vspltis_shifted (op) != 0)
6390 return true;
6392 return false;
6395 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6396 result is OP. Abort if it is not possible. */
6399 gen_easy_altivec_constant (rtx op)
6401 machine_mode mode = GET_MODE (op);
6402 int nunits = GET_MODE_NUNITS (mode);
6403 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6404 unsigned step = nunits / 4;
6405 unsigned copies = 1;
6407 /* Start with a vspltisw. */
6408 if (vspltis_constant (op, step, copies))
6409 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6411 /* Then try with a vspltish. */
6412 if (step == 1)
6413 copies <<= 1;
6414 else
6415 step >>= 1;
6417 if (vspltis_constant (op, step, copies))
6418 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6420 /* And finally a vspltisb. */
6421 if (step == 1)
6422 copies <<= 1;
6423 else
6424 step >>= 1;
6426 if (vspltis_constant (op, step, copies))
6427 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6429 gcc_unreachable ();
6432 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6433 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6435 Return the number of instructions needed (1 or 2) into the address pointed
6436 via NUM_INSNS_PTR.
6438 Return the constant that is being split via CONSTANT_PTR. */
6440 bool
6441 xxspltib_constant_p (rtx op,
6442 machine_mode mode,
6443 int *num_insns_ptr,
6444 int *constant_ptr)
6446 size_t nunits = GET_MODE_NUNITS (mode);
6447 size_t i;
6448 HOST_WIDE_INT value;
6449 rtx element;
6451 /* Set the returned values to out of bound values. */
6452 *num_insns_ptr = -1;
6453 *constant_ptr = 256;
6455 if (!TARGET_P9_VECTOR)
6456 return false;
6458 if (mode == VOIDmode)
6459 mode = GET_MODE (op);
6461 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6462 return false;
6464 /* Handle (vec_duplicate <constant>). */
6465 if (GET_CODE (op) == VEC_DUPLICATE)
6467 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6468 && mode != V2DImode)
6469 return false;
6471 element = XEXP (op, 0);
6472 if (!CONST_INT_P (element))
6473 return false;
6475 value = INTVAL (element);
6476 if (!IN_RANGE (value, -128, 127))
6477 return false;
6480 /* Handle (const_vector [...]). */
6481 else if (GET_CODE (op) == CONST_VECTOR)
6483 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6484 && mode != V2DImode)
6485 return false;
6487 element = CONST_VECTOR_ELT (op, 0);
6488 if (!CONST_INT_P (element))
6489 return false;
6491 value = INTVAL (element);
6492 if (!IN_RANGE (value, -128, 127))
6493 return false;
6495 for (i = 1; i < nunits; i++)
6497 element = CONST_VECTOR_ELT (op, i);
6498 if (!CONST_INT_P (element))
6499 return false;
6501 if (value != INTVAL (element))
6502 return false;
6506 /* Handle integer constants being loaded into the upper part of the VSX
6507 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6508 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6509 else if (CONST_INT_P (op))
6511 if (!SCALAR_INT_MODE_P (mode))
6512 return false;
6514 value = INTVAL (op);
6515 if (!IN_RANGE (value, -128, 127))
6516 return false;
6518 if (!IN_RANGE (value, -1, 0))
6520 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6521 return false;
6523 if (EASY_VECTOR_15 (value))
6524 return false;
6528 else
6529 return false;
6531 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6532 sign extend. Special case 0/-1 to allow getting any VSX register instead
6533 of an Altivec register. */
6534 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6535 && EASY_VECTOR_15 (value))
6536 return false;
6538 /* Return # of instructions and the constant byte for XXSPLTIB. */
6539 if (mode == V16QImode)
6540 *num_insns_ptr = 1;
6542 else if (IN_RANGE (value, -1, 0))
6543 *num_insns_ptr = 1;
6545 else
6546 *num_insns_ptr = 2;
6548 *constant_ptr = (int) value;
6549 return true;
6552 const char *
6553 output_vec_const_move (rtx *operands)
6555 int cst, cst2, shift;
6556 machine_mode mode;
6557 rtx dest, vec;
6559 dest = operands[0];
6560 vec = operands[1];
6561 mode = GET_MODE (dest);
6563 if (TARGET_VSX)
6565 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6566 int xxspltib_value = 256;
6567 int num_insns = -1;
6569 if (zero_constant (vec, mode))
6571 if (TARGET_P9_VECTOR)
6572 return "xxspltib %x0,0";
6574 else if (dest_vmx_p)
6575 return "vspltisw %0,0";
6577 else
6578 return "xxlxor %x0,%x0,%x0";
6581 if (all_ones_constant (vec, mode))
6583 if (TARGET_P9_VECTOR)
6584 return "xxspltib %x0,255";
6586 else if (dest_vmx_p)
6587 return "vspltisw %0,-1";
6589 else if (TARGET_P8_VECTOR)
6590 return "xxlorc %x0,%x0,%x0";
6592 else
6593 gcc_unreachable ();
6596 if (TARGET_P9_VECTOR
6597 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6599 if (num_insns == 1)
6601 operands[2] = GEN_INT (xxspltib_value & 0xff);
6602 return "xxspltib %x0,%2";
6605 return "#";
6609 if (TARGET_ALTIVEC)
6611 rtx splat_vec;
6613 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6614 if (zero_constant (vec, mode))
6615 return "vspltisw %0,0";
6617 if (all_ones_constant (vec, mode))
6618 return "vspltisw %0,-1";
6620 /* Do we need to construct a value using VSLDOI? */
6621 shift = vspltis_shifted (vec);
6622 if (shift != 0)
6623 return "#";
6625 splat_vec = gen_easy_altivec_constant (vec);
6626 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6627 operands[1] = XEXP (splat_vec, 0);
6628 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6629 return "#";
6631 switch (GET_MODE (splat_vec))
6633 case V4SImode:
6634 return "vspltisw %0,%1";
6636 case V8HImode:
6637 return "vspltish %0,%1";
6639 case V16QImode:
6640 return "vspltisb %0,%1";
6642 default:
6643 gcc_unreachable ();
6647 gcc_assert (TARGET_SPE);
6649 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6650 pattern of V1DI, V4HI, and V2SF.
6652 FIXME: We should probably return # and add post reload
6653 splitters for these, but this way is so easy ;-). */
6654 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6655 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6656 operands[1] = CONST_VECTOR_ELT (vec, 0);
6657 operands[2] = CONST_VECTOR_ELT (vec, 1);
6658 if (cst == cst2)
6659 return "li %0,%1\n\tevmergelo %0,%0,%0";
6660 else if (WORDS_BIG_ENDIAN)
6661 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6662 else
6663 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6666 /* Initialize TARGET of vector PAIRED to VALS. */
6668 void
6669 paired_expand_vector_init (rtx target, rtx vals)
6671 machine_mode mode = GET_MODE (target);
6672 int n_elts = GET_MODE_NUNITS (mode);
6673 int n_var = 0;
6674 rtx x, new_rtx, tmp, constant_op, op1, op2;
6675 int i;
6677 for (i = 0; i < n_elts; ++i)
6679 x = XVECEXP (vals, 0, i);
6680 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6681 ++n_var;
6683 if (n_var == 0)
6685 /* Load from constant pool. */
6686 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6687 return;
6690 if (n_var == 2)
6692 /* The vector is initialized only with non-constants. */
6693 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6694 XVECEXP (vals, 0, 1));
6696 emit_move_insn (target, new_rtx);
6697 return;
6700 /* One field is non-constant and the other one is a constant. Load the
6701 constant from the constant pool and use ps_merge instruction to
6702 construct the whole vector. */
6703 op1 = XVECEXP (vals, 0, 0);
6704 op2 = XVECEXP (vals, 0, 1);
6706 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6708 tmp = gen_reg_rtx (GET_MODE (constant_op));
6709 emit_move_insn (tmp, constant_op);
6711 if (CONSTANT_P (op1))
6712 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6713 else
6714 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6716 emit_move_insn (target, new_rtx);
6719 void
6720 paired_expand_vector_move (rtx operands[])
6722 rtx op0 = operands[0], op1 = operands[1];
6724 emit_move_insn (op0, op1);
6727 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6728 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6729 operands for the relation operation COND. This is a recursive
6730 function. */
6732 static void
6733 paired_emit_vector_compare (enum rtx_code rcode,
6734 rtx dest, rtx op0, rtx op1,
6735 rtx cc_op0, rtx cc_op1)
6737 rtx tmp = gen_reg_rtx (V2SFmode);
6738 rtx tmp1, max, min;
6740 gcc_assert (TARGET_PAIRED_FLOAT);
6741 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6743 switch (rcode)
6745 case LT:
6746 case LTU:
6747 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6748 return;
6749 case GE:
6750 case GEU:
6751 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6752 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6753 return;
6754 case LE:
6755 case LEU:
6756 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6757 return;
6758 case GT:
6759 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6760 return;
6761 case EQ:
6762 tmp1 = gen_reg_rtx (V2SFmode);
6763 max = gen_reg_rtx (V2SFmode);
6764 min = gen_reg_rtx (V2SFmode);
6765 gen_reg_rtx (V2SFmode);
6767 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6768 emit_insn (gen_selv2sf4
6769 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6770 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6771 emit_insn (gen_selv2sf4
6772 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6773 emit_insn (gen_subv2sf3 (tmp1, min, max));
6774 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6775 return;
6776 case NE:
6777 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6778 return;
6779 case UNLE:
6780 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6781 return;
6782 case UNLT:
6783 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6784 return;
6785 case UNGE:
6786 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6787 return;
6788 case UNGT:
6789 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6790 return;
6791 default:
6792 gcc_unreachable ();
6795 return;
6798 /* Emit vector conditional expression.
6799 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6800 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6803 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6804 rtx cond, rtx cc_op0, rtx cc_op1)
6806 enum rtx_code rcode = GET_CODE (cond);
6808 if (!TARGET_PAIRED_FLOAT)
6809 return 0;
6811 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6813 return 1;
6816 /* Initialize vector TARGET to VALS. */
6818 void
6819 rs6000_expand_vector_init (rtx target, rtx vals)
6821 machine_mode mode = GET_MODE (target);
6822 machine_mode inner_mode = GET_MODE_INNER (mode);
6823 int n_elts = GET_MODE_NUNITS (mode);
6824 int n_var = 0, one_var = -1;
6825 bool all_same = true, all_const_zero = true;
6826 rtx x, mem;
6827 int i;
6829 for (i = 0; i < n_elts; ++i)
6831 x = XVECEXP (vals, 0, i);
6832 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6833 ++n_var, one_var = i;
6834 else if (x != CONST0_RTX (inner_mode))
6835 all_const_zero = false;
6837 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6838 all_same = false;
6841 if (n_var == 0)
6843 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6844 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6845 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6847 /* Zero register. */
6848 emit_move_insn (target, CONST0_RTX (mode));
6849 return;
6851 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6853 /* Splat immediate. */
6854 emit_insn (gen_rtx_SET (target, const_vec));
6855 return;
6857 else
6859 /* Load from constant pool. */
6860 emit_move_insn (target, const_vec);
6861 return;
6865 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6866 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6868 rtx op0 = XVECEXP (vals, 0, 0);
6869 rtx op1 = XVECEXP (vals, 0, 1);
6870 if (all_same)
6872 if (!MEM_P (op0) && !REG_P (op0))
6873 op0 = force_reg (inner_mode, op0);
6874 if (mode == V2DFmode)
6875 emit_insn (gen_vsx_splat_v2df (target, op0));
6876 else
6877 emit_insn (gen_vsx_splat_v2di (target, op0));
6879 else
6881 op0 = force_reg (inner_mode, op0);
6882 op1 = force_reg (inner_mode, op1);
6883 if (mode == V2DFmode)
6884 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6885 else
6886 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6888 return;
6891 /* Special case initializing vector int if we are on 64-bit systems with
6892 direct move or we have the ISA 3.0 instructions. */
6893 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6894 && TARGET_DIRECT_MOVE_64BIT)
6896 if (all_same)
6898 rtx element0 = XVECEXP (vals, 0, 0);
6899 if (MEM_P (element0))
6900 element0 = rs6000_address_for_fpconvert (element0);
6901 else
6902 element0 = force_reg (SImode, element0);
6904 if (TARGET_P9_VECTOR)
6905 emit_insn (gen_vsx_splat_v4si (target, element0));
6906 else
6908 rtx tmp = gen_reg_rtx (DImode);
6909 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6910 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6912 return;
6914 else
6916 rtx elements[4];
6917 size_t i;
6919 for (i = 0; i < 4; i++)
6921 elements[i] = XVECEXP (vals, 0, i);
6922 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6923 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6926 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6927 elements[2], elements[3]));
6928 return;
6932 /* With single precision floating point on VSX, know that internally single
6933 precision is actually represented as a double, and either make 2 V2DF
6934 vectors, and convert these vectors to single precision, or do one
6935 conversion, and splat the result to the other elements. */
6936 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6938 if (all_same)
6940 rtx element0 = XVECEXP (vals, 0, 0);
6942 if (TARGET_P9_VECTOR)
6944 if (MEM_P (element0))
6945 element0 = rs6000_address_for_fpconvert (element0);
6947 emit_insn (gen_vsx_splat_v4sf (target, element0));
6950 else
6952 rtx freg = gen_reg_rtx (V4SFmode);
6953 rtx sreg = force_reg (SFmode, element0);
6954 rtx cvt = (TARGET_XSCVDPSPN
6955 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6956 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6958 emit_insn (cvt);
6959 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6960 const0_rtx));
6963 else
6965 rtx dbl_even = gen_reg_rtx (V2DFmode);
6966 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6967 rtx flt_even = gen_reg_rtx (V4SFmode);
6968 rtx flt_odd = gen_reg_rtx (V4SFmode);
6969 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6970 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6971 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6972 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6974 /* Use VMRGEW if we can instead of doing a permute. */
6975 if (TARGET_P8_VECTOR)
6977 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6978 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6979 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6980 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6981 if (BYTES_BIG_ENDIAN)
6982 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6983 else
6984 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6986 else
6988 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6989 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6990 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6991 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6992 rs6000_expand_extract_even (target, flt_even, flt_odd);
6995 return;
6998 /* Special case initializing vector short/char that are splats if we are on
6999 64-bit systems with direct move. */
7000 if (all_same && TARGET_DIRECT_MOVE_64BIT
7001 && (mode == V16QImode || mode == V8HImode))
7003 rtx op0 = XVECEXP (vals, 0, 0);
7004 rtx di_tmp = gen_reg_rtx (DImode);
7006 if (!REG_P (op0))
7007 op0 = force_reg (GET_MODE_INNER (mode), op0);
7009 if (mode == V16QImode)
7011 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7012 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7013 return;
7016 if (mode == V8HImode)
7018 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7019 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7020 return;
7024 /* Store value to stack temp. Load vector element. Splat. However, splat
7025 of 64-bit items is not supported on Altivec. */
7026 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7028 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7029 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7030 XVECEXP (vals, 0, 0));
7031 x = gen_rtx_UNSPEC (VOIDmode,
7032 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7033 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7034 gen_rtvec (2,
7035 gen_rtx_SET (target, mem),
7036 x)));
7037 x = gen_rtx_VEC_SELECT (inner_mode, target,
7038 gen_rtx_PARALLEL (VOIDmode,
7039 gen_rtvec (1, const0_rtx)));
7040 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7041 return;
7044 /* One field is non-constant. Load constant then overwrite
7045 varying field. */
7046 if (n_var == 1)
7048 rtx copy = copy_rtx (vals);
7050 /* Load constant part of vector, substitute neighboring value for
7051 varying element. */
7052 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7053 rs6000_expand_vector_init (target, copy);
7055 /* Insert variable. */
7056 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7057 return;
7060 /* Construct the vector in memory one field at a time
7061 and load the whole vector. */
7062 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7063 for (i = 0; i < n_elts; i++)
7064 emit_move_insn (adjust_address_nv (mem, inner_mode,
7065 i * GET_MODE_SIZE (inner_mode)),
7066 XVECEXP (vals, 0, i));
7067 emit_move_insn (target, mem);
7070 /* Set field ELT of TARGET to VAL. */
7072 void
7073 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7075 machine_mode mode = GET_MODE (target);
7076 machine_mode inner_mode = GET_MODE_INNER (mode);
7077 rtx reg = gen_reg_rtx (mode);
7078 rtx mask, mem, x;
7079 int width = GET_MODE_SIZE (inner_mode);
7080 int i;
7082 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7084 rtx (*set_func) (rtx, rtx, rtx, rtx)
7085 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
7086 emit_insn (set_func (target, target, val, GEN_INT (elt)));
7087 return;
7090 /* Simplify setting single element vectors like V1TImode. */
7091 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7093 emit_move_insn (target, gen_lowpart (mode, val));
7094 return;
7097 /* Load single variable value. */
7098 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7099 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7100 x = gen_rtx_UNSPEC (VOIDmode,
7101 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7102 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7103 gen_rtvec (2,
7104 gen_rtx_SET (reg, mem),
7105 x)));
7107 /* Linear sequence. */
7108 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7109 for (i = 0; i < 16; ++i)
7110 XVECEXP (mask, 0, i) = GEN_INT (i);
7112 /* Set permute mask to insert element into target. */
7113 for (i = 0; i < width; ++i)
7114 XVECEXP (mask, 0, elt*width + i)
7115 = GEN_INT (i + 0x10);
7116 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7118 if (BYTES_BIG_ENDIAN)
7119 x = gen_rtx_UNSPEC (mode,
7120 gen_rtvec (3, target, reg,
7121 force_reg (V16QImode, x)),
7122 UNSPEC_VPERM);
7123 else
7125 if (TARGET_P9_VECTOR)
7126 x = gen_rtx_UNSPEC (mode,
7127 gen_rtvec (3, target, reg,
7128 force_reg (V16QImode, x)),
7129 UNSPEC_VPERMR);
7130 else
7132 /* Invert selector. We prefer to generate VNAND on P8 so
7133 that future fusion opportunities can kick in, but must
7134 generate VNOR elsewhere. */
7135 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7136 rtx iorx = (TARGET_P8_VECTOR
7137 ? gen_rtx_IOR (V16QImode, notx, notx)
7138 : gen_rtx_AND (V16QImode, notx, notx));
7139 rtx tmp = gen_reg_rtx (V16QImode);
7140 emit_insn (gen_rtx_SET (tmp, iorx));
7142 /* Permute with operands reversed and adjusted selector. */
7143 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7144 UNSPEC_VPERM);
7148 emit_insn (gen_rtx_SET (target, x));
7151 /* Extract field ELT from VEC into TARGET. */
7153 void
7154 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7156 machine_mode mode = GET_MODE (vec);
7157 machine_mode inner_mode = GET_MODE_INNER (mode);
7158 rtx mem;
7160 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7162 switch (mode)
7164 default:
7165 break;
7166 case V1TImode:
7167 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7168 emit_move_insn (target, gen_lowpart (TImode, vec));
7169 break;
7170 case V2DFmode:
7171 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7172 return;
7173 case V2DImode:
7174 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7175 return;
7176 case V4SFmode:
7177 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7178 return;
7179 case V16QImode:
7180 if (TARGET_DIRECT_MOVE_64BIT)
7182 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7183 return;
7185 else
7186 break;
7187 case V8HImode:
7188 if (TARGET_DIRECT_MOVE_64BIT)
7190 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7191 return;
7193 else
7194 break;
7195 case V4SImode:
7196 if (TARGET_DIRECT_MOVE_64BIT)
7198 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7199 return;
7201 break;
7204 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7205 && TARGET_DIRECT_MOVE_64BIT)
7207 if (GET_MODE (elt) != DImode)
7209 rtx tmp = gen_reg_rtx (DImode);
7210 convert_move (tmp, elt, 0);
7211 elt = tmp;
7214 switch (mode)
7216 case V2DFmode:
7217 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7218 return;
7220 case V2DImode:
7221 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7222 return;
7224 case V4SFmode:
7225 if (TARGET_UPPER_REGS_SF)
7227 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7228 return;
7230 break;
7232 case V4SImode:
7233 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7234 return;
7236 case V8HImode:
7237 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7238 return;
7240 case V16QImode:
7241 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7242 return;
7244 default:
7245 gcc_unreachable ();
7249 gcc_assert (CONST_INT_P (elt));
7251 /* Allocate mode-sized buffer. */
7252 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7254 emit_move_insn (mem, vec);
7256 /* Add offset to field within buffer matching vector element. */
7257 mem = adjust_address_nv (mem, inner_mode,
7258 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7260 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7263 /* Helper function to return the register number of a RTX. */
7264 static inline int
7265 regno_or_subregno (rtx op)
7267 if (REG_P (op))
7268 return REGNO (op);
7269 else if (SUBREG_P (op))
7270 return subreg_regno (op);
7271 else
7272 gcc_unreachable ();
7275 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7276 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7277 temporary (BASE_TMP) to fixup the address. Return the new memory address
7278 that is valid for reads or writes to a given register (SCALAR_REG). */
7281 rs6000_adjust_vec_address (rtx scalar_reg,
7282 rtx mem,
7283 rtx element,
7284 rtx base_tmp,
7285 machine_mode scalar_mode)
7287 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7288 rtx addr = XEXP (mem, 0);
7289 rtx element_offset;
7290 rtx new_addr;
7291 bool valid_addr_p;
7293 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7294 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7296 /* Calculate what we need to add to the address to get the element
7297 address. */
7298 if (CONST_INT_P (element))
7299 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7300 else
7302 int byte_shift = exact_log2 (scalar_size);
7303 gcc_assert (byte_shift >= 0);
7305 if (byte_shift == 0)
7306 element_offset = element;
7308 else
7310 if (TARGET_POWERPC64)
7311 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7312 else
7313 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7315 element_offset = base_tmp;
7319 /* Create the new address pointing to the element within the vector. If we
7320 are adding 0, we don't have to change the address. */
7321 if (element_offset == const0_rtx)
7322 new_addr = addr;
7324 /* A simple indirect address can be converted into a reg + offset
7325 address. */
7326 else if (REG_P (addr) || SUBREG_P (addr))
7327 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7329 /* Optimize D-FORM addresses with constant offset with a constant element, to
7330 include the element offset in the address directly. */
7331 else if (GET_CODE (addr) == PLUS)
7333 rtx op0 = XEXP (addr, 0);
7334 rtx op1 = XEXP (addr, 1);
7335 rtx insn;
7337 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7338 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7340 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7341 rtx offset_rtx = GEN_INT (offset);
7343 if (IN_RANGE (offset, -32768, 32767)
7344 && (scalar_size < 8 || (offset & 0x3) == 0))
7345 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7346 else
7348 emit_move_insn (base_tmp, offset_rtx);
7349 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7352 else
7354 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7355 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7357 /* Note, ADDI requires the register being added to be a base
7358 register. If the register was R0, load it up into the temporary
7359 and do the add. */
7360 if (op1_reg_p
7361 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7363 insn = gen_add3_insn (base_tmp, op1, element_offset);
7364 gcc_assert (insn != NULL_RTX);
7365 emit_insn (insn);
7368 else if (ele_reg_p
7369 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7371 insn = gen_add3_insn (base_tmp, element_offset, op1);
7372 gcc_assert (insn != NULL_RTX);
7373 emit_insn (insn);
7376 else
7378 emit_move_insn (base_tmp, op1);
7379 emit_insn (gen_add2_insn (base_tmp, element_offset));
7382 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7386 else
7388 emit_move_insn (base_tmp, addr);
7389 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7392 /* If we have a PLUS, we need to see whether the particular register class
7393 allows for D-FORM or X-FORM addressing. */
7394 if (GET_CODE (new_addr) == PLUS)
7396 rtx op1 = XEXP (new_addr, 1);
7397 addr_mask_type addr_mask;
7398 int scalar_regno = regno_or_subregno (scalar_reg);
7400 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7401 if (INT_REGNO_P (scalar_regno))
7402 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7404 else if (FP_REGNO_P (scalar_regno))
7405 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7407 else if (ALTIVEC_REGNO_P (scalar_regno))
7408 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7410 else
7411 gcc_unreachable ();
7413 if (REG_P (op1) || SUBREG_P (op1))
7414 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7415 else
7416 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7419 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7420 valid_addr_p = true;
7422 else
7423 valid_addr_p = false;
7425 if (!valid_addr_p)
7427 emit_move_insn (base_tmp, new_addr);
7428 new_addr = base_tmp;
7431 return change_address (mem, scalar_mode, new_addr);
7434 /* Split a variable vec_extract operation into the component instructions. */
7436 void
7437 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7438 rtx tmp_altivec)
7440 machine_mode mode = GET_MODE (src);
7441 machine_mode scalar_mode = GET_MODE (dest);
7442 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7443 int byte_shift = exact_log2 (scalar_size);
7445 gcc_assert (byte_shift >= 0);
7447 /* If we are given a memory address, optimize to load just the element. We
7448 don't have to adjust the vector element number on little endian
7449 systems. */
7450 if (MEM_P (src))
7452 gcc_assert (REG_P (tmp_gpr));
7453 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7454 tmp_gpr, scalar_mode));
7455 return;
7458 else if (REG_P (src) || SUBREG_P (src))
7460 int bit_shift = byte_shift + 3;
7461 rtx element2;
7463 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7465 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7466 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7467 will shift the element into the upper position (adding 3 to convert a
7468 byte shift into a bit shift). */
7469 if (scalar_size == 8)
7471 if (!VECTOR_ELT_ORDER_BIG)
7473 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7474 element2 = tmp_gpr;
7476 else
7477 element2 = element;
7479 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7480 bit. */
7481 emit_insn (gen_rtx_SET (tmp_gpr,
7482 gen_rtx_AND (DImode,
7483 gen_rtx_ASHIFT (DImode,
7484 element2,
7485 GEN_INT (6)),
7486 GEN_INT (64))));
7488 else
7490 if (!VECTOR_ELT_ORDER_BIG)
7492 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7494 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7495 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7496 element2 = tmp_gpr;
7498 else
7499 element2 = element;
7501 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7504 /* Get the value into the lower byte of the Altivec register where VSLO
7505 expects it. */
7506 if (TARGET_P9_VECTOR)
7507 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7508 else if (can_create_pseudo_p ())
7509 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7510 else
7512 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7513 emit_move_insn (tmp_di, tmp_gpr);
7514 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7517 /* Do the VSLO to get the value into the final location. */
7518 switch (mode)
7520 case V2DFmode:
7521 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7522 return;
7524 case V2DImode:
7525 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7526 return;
7528 case V4SFmode:
7530 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7531 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7532 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7533 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7534 tmp_altivec));
7536 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7537 return;
7540 case V4SImode:
7541 case V8HImode:
7542 case V16QImode:
7544 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7545 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7546 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7547 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7548 tmp_altivec));
7549 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7550 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7551 GEN_INT (64 - (8 * scalar_size))));
7552 return;
7555 default:
7556 gcc_unreachable ();
7559 return;
7561 else
7562 gcc_unreachable ();
7565 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7566 two SImode values. */
7568 static void
7569 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7571 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7573 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7575 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7576 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7578 emit_move_insn (dest, GEN_INT (const1 | const2));
7579 return;
7582 /* Put si1 into upper 32-bits of dest. */
7583 if (CONST_INT_P (si1))
7584 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7585 else
7587 /* Generate RLDIC. */
7588 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7589 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7590 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7591 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7592 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7593 emit_insn (gen_rtx_SET (dest, and_rtx));
7596 /* Put si2 into the temporary. */
7597 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7598 if (CONST_INT_P (si2))
7599 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7600 else
7601 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7603 /* Combine the two parts. */
7604 emit_insn (gen_iordi3 (dest, dest, tmp));
7605 return;
7608 /* Split a V4SI initialization. */
7610 void
7611 rs6000_split_v4si_init (rtx operands[])
7613 rtx dest = operands[0];
7615 /* Destination is a GPR, build up the two DImode parts in place. */
7616 if (REG_P (dest) || SUBREG_P (dest))
7618 int d_regno = regno_or_subregno (dest);
7619 rtx scalar1 = operands[1];
7620 rtx scalar2 = operands[2];
7621 rtx scalar3 = operands[3];
7622 rtx scalar4 = operands[4];
7623 rtx tmp1 = operands[5];
7624 rtx tmp2 = operands[6];
7626 /* Even though we only need one temporary (plus the destination, which
7627 has an early clobber constraint, try to use two temporaries, one for
7628 each double word created. That way the 2nd insn scheduling pass can
7629 rearrange things so the two parts are done in parallel. */
7630 if (BYTES_BIG_ENDIAN)
7632 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7633 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7634 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7635 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7637 else
7639 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7640 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7641 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7642 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7643 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7645 return;
7648 else
7649 gcc_unreachable ();
7652 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7654 bool
7655 invalid_e500_subreg (rtx op, machine_mode mode)
7657 if (TARGET_E500_DOUBLE)
7659 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7660 subreg:TI and reg:TF. Decimal float modes are like integer
7661 modes (only low part of each register used) for this
7662 purpose. */
7663 if (GET_CODE (op) == SUBREG
7664 && (mode == SImode || mode == DImode || mode == TImode
7665 || mode == DDmode || mode == TDmode || mode == PTImode)
7666 && REG_P (SUBREG_REG (op))
7667 && (GET_MODE (SUBREG_REG (op)) == DFmode
7668 || GET_MODE (SUBREG_REG (op)) == TFmode
7669 || GET_MODE (SUBREG_REG (op)) == IFmode
7670 || GET_MODE (SUBREG_REG (op)) == KFmode))
7671 return true;
7673 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7674 reg:TI. */
7675 if (GET_CODE (op) == SUBREG
7676 && (mode == DFmode || mode == TFmode || mode == IFmode
7677 || mode == KFmode)
7678 && REG_P (SUBREG_REG (op))
7679 && (GET_MODE (SUBREG_REG (op)) == DImode
7680 || GET_MODE (SUBREG_REG (op)) == TImode
7681 || GET_MODE (SUBREG_REG (op)) == PTImode
7682 || GET_MODE (SUBREG_REG (op)) == DDmode
7683 || GET_MODE (SUBREG_REG (op)) == TDmode))
7684 return true;
7687 if (TARGET_SPE
7688 && GET_CODE (op) == SUBREG
7689 && mode == SImode
7690 && REG_P (SUBREG_REG (op))
7691 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7692 return true;
7694 return false;
7697 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7698 selects whether the alignment is abi mandated, optional, or
7699 both abi and optional alignment. */
7701 unsigned int
7702 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7704 if (how != align_opt)
7706 if (TREE_CODE (type) == VECTOR_TYPE)
7708 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7709 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7711 if (align < 64)
7712 align = 64;
7714 else if (align < 128)
7715 align = 128;
7717 else if (TARGET_E500_DOUBLE
7718 && TREE_CODE (type) == REAL_TYPE
7719 && TYPE_MODE (type) == DFmode)
7721 if (align < 64)
7722 align = 64;
7726 if (how != align_abi)
7728 if (TREE_CODE (type) == ARRAY_TYPE
7729 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7731 if (align < BITS_PER_WORD)
7732 align = BITS_PER_WORD;
7736 return align;
7739 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7741 bool
7742 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7744 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7746 if (computed != 128)
7748 static bool warned;
7749 if (!warned && warn_psabi)
7751 warned = true;
7752 inform (input_location,
7753 "the layout of aggregates containing vectors with"
7754 " %d-byte alignment has changed in GCC 5",
7755 computed / BITS_PER_UNIT);
7758 /* In current GCC there is no special case. */
7759 return false;
7762 return false;
7765 /* AIX increases natural record alignment to doubleword if the first
7766 field is an FP double while the FP fields remain word aligned. */
7768 unsigned int
7769 rs6000_special_round_type_align (tree type, unsigned int computed,
7770 unsigned int specified)
7772 unsigned int align = MAX (computed, specified);
7773 tree field = TYPE_FIELDS (type);
7775 /* Skip all non field decls */
7776 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7777 field = DECL_CHAIN (field);
7779 if (field != NULL && field != type)
7781 type = TREE_TYPE (field);
7782 while (TREE_CODE (type) == ARRAY_TYPE)
7783 type = TREE_TYPE (type);
7785 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7786 align = MAX (align, 64);
7789 return align;
7792 /* Darwin increases record alignment to the natural alignment of
7793 the first field. */
7795 unsigned int
7796 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7797 unsigned int specified)
7799 unsigned int align = MAX (computed, specified);
7801 if (TYPE_PACKED (type))
7802 return align;
7804 /* Find the first field, looking down into aggregates. */
7805 do {
7806 tree field = TYPE_FIELDS (type);
7807 /* Skip all non field decls */
7808 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7809 field = DECL_CHAIN (field);
7810 if (! field)
7811 break;
7812 /* A packed field does not contribute any extra alignment. */
7813 if (DECL_PACKED (field))
7814 return align;
7815 type = TREE_TYPE (field);
7816 while (TREE_CODE (type) == ARRAY_TYPE)
7817 type = TREE_TYPE (type);
7818 } while (AGGREGATE_TYPE_P (type));
7820 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7821 align = MAX (align, TYPE_ALIGN (type));
7823 return align;
7826 /* Return 1 for an operand in small memory on V.4/eabi. */
7829 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7830 machine_mode mode ATTRIBUTE_UNUSED)
7832 #if TARGET_ELF
7833 rtx sym_ref;
7835 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7836 return 0;
7838 if (DEFAULT_ABI != ABI_V4)
7839 return 0;
7841 /* Vector and float memory instructions have a limited offset on the
7842 SPE, so using a vector or float variable directly as an operand is
7843 not useful. */
7844 if (TARGET_SPE
7845 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7846 return 0;
7848 if (GET_CODE (op) == SYMBOL_REF)
7849 sym_ref = op;
7851 else if (GET_CODE (op) != CONST
7852 || GET_CODE (XEXP (op, 0)) != PLUS
7853 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7854 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7855 return 0;
7857 else
7859 rtx sum = XEXP (op, 0);
7860 HOST_WIDE_INT summand;
7862 /* We have to be careful here, because it is the referenced address
7863 that must be 32k from _SDA_BASE_, not just the symbol. */
7864 summand = INTVAL (XEXP (sum, 1));
7865 if (summand < 0 || summand > g_switch_value)
7866 return 0;
7868 sym_ref = XEXP (sum, 0);
7871 return SYMBOL_REF_SMALL_P (sym_ref);
7872 #else
7873 return 0;
7874 #endif
7877 /* Return true if either operand is a general purpose register. */
7879 bool
7880 gpr_or_gpr_p (rtx op0, rtx op1)
7882 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7883 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7886 /* Return true if this is a move direct operation between GPR registers and
7887 floating point/VSX registers. */
7889 bool
7890 direct_move_p (rtx op0, rtx op1)
7892 int regno0, regno1;
7894 if (!REG_P (op0) || !REG_P (op1))
7895 return false;
7897 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7898 return false;
7900 regno0 = REGNO (op0);
7901 regno1 = REGNO (op1);
7902 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7903 return false;
7905 if (INT_REGNO_P (regno0))
7906 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7908 else if (INT_REGNO_P (regno1))
7910 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7911 return true;
7913 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7914 return true;
7917 return false;
7920 /* Return true if the OFFSET is valid for the quad address instructions that
7921 use d-form (register + offset) addressing. */
7923 static inline bool
7924 quad_address_offset_p (HOST_WIDE_INT offset)
7926 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7929 /* Return true if the ADDR is an acceptable address for a quad memory
7930 operation of mode MODE (either LQ/STQ for general purpose registers, or
7931 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7932 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7933 3.0 LXV/STXV instruction. */
7935 bool
7936 quad_address_p (rtx addr, machine_mode mode, bool strict)
7938 rtx op0, op1;
7940 if (GET_MODE_SIZE (mode) != 16)
7941 return false;
7943 if (legitimate_indirect_address_p (addr, strict))
7944 return true;
7946 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7947 return false;
7949 if (GET_CODE (addr) != PLUS)
7950 return false;
7952 op0 = XEXP (addr, 0);
7953 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7954 return false;
7956 op1 = XEXP (addr, 1);
7957 if (!CONST_INT_P (op1))
7958 return false;
7960 return quad_address_offset_p (INTVAL (op1));
7963 /* Return true if this is a load or store quad operation. This function does
7964 not handle the atomic quad memory instructions. */
7966 bool
7967 quad_load_store_p (rtx op0, rtx op1)
7969 bool ret;
7971 if (!TARGET_QUAD_MEMORY)
7972 ret = false;
7974 else if (REG_P (op0) && MEM_P (op1))
7975 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7976 && quad_memory_operand (op1, GET_MODE (op1))
7977 && !reg_overlap_mentioned_p (op0, op1));
7979 else if (MEM_P (op0) && REG_P (op1))
7980 ret = (quad_memory_operand (op0, GET_MODE (op0))
7981 && quad_int_reg_operand (op1, GET_MODE (op1)));
7983 else
7984 ret = false;
7986 if (TARGET_DEBUG_ADDR)
7988 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7989 ret ? "true" : "false");
7990 debug_rtx (gen_rtx_SET (op0, op1));
7993 return ret;
7996 /* Given an address, return a constant offset term if one exists. */
7998 static rtx
7999 address_offset (rtx op)
8001 if (GET_CODE (op) == PRE_INC
8002 || GET_CODE (op) == PRE_DEC)
8003 op = XEXP (op, 0);
8004 else if (GET_CODE (op) == PRE_MODIFY
8005 || GET_CODE (op) == LO_SUM)
8006 op = XEXP (op, 1);
8008 if (GET_CODE (op) == CONST)
8009 op = XEXP (op, 0);
8011 if (GET_CODE (op) == PLUS)
8012 op = XEXP (op, 1);
8014 if (CONST_INT_P (op))
8015 return op;
8017 return NULL_RTX;
8020 /* Return true if the MEM operand is a memory operand suitable for use
8021 with a (full width, possibly multiple) gpr load/store. On
8022 powerpc64 this means the offset must be divisible by 4.
8023 Implements 'Y' constraint.
8025 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8026 a constraint function we know the operand has satisfied a suitable
8027 memory predicate. Also accept some odd rtl generated by reload
8028 (see rs6000_legitimize_reload_address for various forms). It is
8029 important that reload rtl be accepted by appropriate constraints
8030 but not by the operand predicate.
8032 Offsetting a lo_sum should not be allowed, except where we know by
8033 alignment that a 32k boundary is not crossed, but see the ???
8034 comment in rs6000_legitimize_reload_address. Note that by
8035 "offsetting" here we mean a further offset to access parts of the
8036 MEM. It's fine to have a lo_sum where the inner address is offset
8037 from a sym, since the same sym+offset will appear in the high part
8038 of the address calculation. */
8040 bool
8041 mem_operand_gpr (rtx op, machine_mode mode)
8043 unsigned HOST_WIDE_INT offset;
8044 int extra;
8045 rtx addr = XEXP (op, 0);
8047 op = address_offset (addr);
8048 if (op == NULL_RTX)
8049 return true;
8051 offset = INTVAL (op);
8052 if (TARGET_POWERPC64 && (offset & 3) != 0)
8053 return false;
8055 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8056 if (extra < 0)
8057 extra = 0;
8059 if (GET_CODE (addr) == LO_SUM)
8060 /* For lo_sum addresses, we must allow any offset except one that
8061 causes a wrap, so test only the low 16 bits. */
8062 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8064 return offset + 0x8000 < 0x10000u - extra;
8067 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8068 enforce an offset divisible by 4 even for 32-bit. */
8070 bool
8071 mem_operand_ds_form (rtx op, machine_mode mode)
8073 unsigned HOST_WIDE_INT offset;
8074 int extra;
8075 rtx addr = XEXP (op, 0);
8077 if (!offsettable_address_p (false, mode, addr))
8078 return false;
8080 op = address_offset (addr);
8081 if (op == NULL_RTX)
8082 return true;
8084 offset = INTVAL (op);
8085 if ((offset & 3) != 0)
8086 return false;
8088 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8089 if (extra < 0)
8090 extra = 0;
8092 if (GET_CODE (addr) == LO_SUM)
8093 /* For lo_sum addresses, we must allow any offset except one that
8094 causes a wrap, so test only the low 16 bits. */
8095 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8097 return offset + 0x8000 < 0x10000u - extra;
8100 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8102 static bool
8103 reg_offset_addressing_ok_p (machine_mode mode)
8105 switch (mode)
8107 case V16QImode:
8108 case V8HImode:
8109 case V4SFmode:
8110 case V4SImode:
8111 case V2DFmode:
8112 case V2DImode:
8113 case V1TImode:
8114 case TImode:
8115 case TFmode:
8116 case KFmode:
8117 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8118 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8119 a vector mode, if we want to use the VSX registers to move it around,
8120 we need to restrict ourselves to reg+reg addressing. Similarly for
8121 IEEE 128-bit floating point that is passed in a single vector
8122 register. */
8123 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8124 return mode_supports_vsx_dform_quad (mode);
8125 break;
8127 case V4HImode:
8128 case V2SImode:
8129 case V1DImode:
8130 case V2SFmode:
8131 /* Paired vector modes. Only reg+reg addressing is valid. */
8132 if (TARGET_PAIRED_FLOAT)
8133 return false;
8134 break;
8136 case SDmode:
8137 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8138 addressing for the LFIWZX and STFIWX instructions. */
8139 if (TARGET_NO_SDMODE_STACK)
8140 return false;
8141 break;
8143 default:
8144 break;
8147 return true;
8150 static bool
8151 virtual_stack_registers_memory_p (rtx op)
8153 int regnum;
8155 if (GET_CODE (op) == REG)
8156 regnum = REGNO (op);
8158 else if (GET_CODE (op) == PLUS
8159 && GET_CODE (XEXP (op, 0)) == REG
8160 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8161 regnum = REGNO (XEXP (op, 0));
8163 else
8164 return false;
8166 return (regnum >= FIRST_VIRTUAL_REGISTER
8167 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8170 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8171 is known to not straddle a 32k boundary. This function is used
8172 to determine whether -mcmodel=medium code can use TOC pointer
8173 relative addressing for OP. This means the alignment of the TOC
8174 pointer must also be taken into account, and unfortunately that is
8175 only 8 bytes. */
8177 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8178 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8179 #endif
8181 static bool
8182 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8183 machine_mode mode)
8185 tree decl;
8186 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8188 if (GET_CODE (op) != SYMBOL_REF)
8189 return false;
8191 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8192 SYMBOL_REF. */
8193 if (mode_supports_vsx_dform_quad (mode))
8194 return false;
8196 dsize = GET_MODE_SIZE (mode);
8197 decl = SYMBOL_REF_DECL (op);
8198 if (!decl)
8200 if (dsize == 0)
8201 return false;
8203 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8204 replacing memory addresses with an anchor plus offset. We
8205 could find the decl by rummaging around in the block->objects
8206 VEC for the given offset but that seems like too much work. */
8207 dalign = BITS_PER_UNIT;
8208 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8209 && SYMBOL_REF_ANCHOR_P (op)
8210 && SYMBOL_REF_BLOCK (op) != NULL)
8212 struct object_block *block = SYMBOL_REF_BLOCK (op);
8214 dalign = block->alignment;
8215 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8217 else if (CONSTANT_POOL_ADDRESS_P (op))
8219 /* It would be nice to have get_pool_align().. */
8220 machine_mode cmode = get_pool_mode (op);
8222 dalign = GET_MODE_ALIGNMENT (cmode);
8225 else if (DECL_P (decl))
8227 dalign = DECL_ALIGN (decl);
8229 if (dsize == 0)
8231 /* Allow BLKmode when the entire object is known to not
8232 cross a 32k boundary. */
8233 if (!DECL_SIZE_UNIT (decl))
8234 return false;
8236 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8237 return false;
8239 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8240 if (dsize > 32768)
8241 return false;
8243 dalign /= BITS_PER_UNIT;
8244 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8245 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8246 return dalign >= dsize;
8249 else
8250 gcc_unreachable ();
8252 /* Find how many bits of the alignment we know for this access. */
8253 dalign /= BITS_PER_UNIT;
8254 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8255 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8256 mask = dalign - 1;
8257 lsb = offset & -offset;
8258 mask &= lsb - 1;
8259 dalign = mask + 1;
8261 return dalign >= dsize;
8264 static bool
8265 constant_pool_expr_p (rtx op)
8267 rtx base, offset;
8269 split_const (op, &base, &offset);
8270 return (GET_CODE (base) == SYMBOL_REF
8271 && CONSTANT_POOL_ADDRESS_P (base)
8272 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8275 static const_rtx tocrel_base, tocrel_offset;
8277 /* Return true if OP is a toc pointer relative address (the output
8278 of create_TOC_reference). If STRICT, do not match non-split
8279 -mcmodel=large/medium toc pointer relative addresses. */
8281 bool
8282 toc_relative_expr_p (const_rtx op, bool strict)
8284 if (!TARGET_TOC)
8285 return false;
8287 if (TARGET_CMODEL != CMODEL_SMALL)
8289 /* When strict ensure we have everything tidy. */
8290 if (strict
8291 && !(GET_CODE (op) == LO_SUM
8292 && REG_P (XEXP (op, 0))
8293 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8294 return false;
8296 /* When not strict, allow non-split TOC addresses and also allow
8297 (lo_sum (high ..)) TOC addresses created during reload. */
8298 if (GET_CODE (op) == LO_SUM)
8299 op = XEXP (op, 1);
8302 tocrel_base = op;
8303 tocrel_offset = const0_rtx;
8304 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8306 tocrel_base = XEXP (op, 0);
8307 tocrel_offset = XEXP (op, 1);
8310 return (GET_CODE (tocrel_base) == UNSPEC
8311 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8314 /* Return true if X is a constant pool address, and also for cmodel=medium
8315 if X is a toc-relative address known to be offsettable within MODE. */
8317 bool
8318 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8319 bool strict)
8321 return (toc_relative_expr_p (x, strict)
8322 && (TARGET_CMODEL != CMODEL_MEDIUM
8323 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8324 || mode == QImode
8325 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8326 INTVAL (tocrel_offset), mode)));
8329 static bool
8330 legitimate_small_data_p (machine_mode mode, rtx x)
8332 return (DEFAULT_ABI == ABI_V4
8333 && !flag_pic && !TARGET_TOC
8334 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8335 && small_data_operand (x, mode));
8338 /* SPE offset addressing is limited to 5-bits worth of double words. */
8339 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8341 bool
8342 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8343 bool strict, bool worst_case)
8345 unsigned HOST_WIDE_INT offset;
8346 unsigned int extra;
8348 if (GET_CODE (x) != PLUS)
8349 return false;
8350 if (!REG_P (XEXP (x, 0)))
8351 return false;
8352 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8353 return false;
8354 if (mode_supports_vsx_dform_quad (mode))
8355 return quad_address_p (x, mode, strict);
8356 if (!reg_offset_addressing_ok_p (mode))
8357 return virtual_stack_registers_memory_p (x);
8358 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8359 return true;
8360 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8361 return false;
8363 offset = INTVAL (XEXP (x, 1));
8364 extra = 0;
8365 switch (mode)
8367 case V4HImode:
8368 case V2SImode:
8369 case V1DImode:
8370 case V2SFmode:
8371 /* SPE vector modes. */
8372 return SPE_CONST_OFFSET_OK (offset);
8374 case DFmode:
8375 case DDmode:
8376 case DImode:
8377 /* On e500v2, we may have:
8379 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8381 Which gets addressed with evldd instructions. */
8382 if (TARGET_E500_DOUBLE)
8383 return SPE_CONST_OFFSET_OK (offset);
8385 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8386 addressing. */
8387 if (VECTOR_MEM_VSX_P (mode))
8388 return false;
8390 if (!worst_case)
8391 break;
8392 if (!TARGET_POWERPC64)
8393 extra = 4;
8394 else if (offset & 3)
8395 return false;
8396 break;
8398 case TFmode:
8399 case IFmode:
8400 case KFmode:
8401 if (TARGET_E500_DOUBLE)
8402 return (SPE_CONST_OFFSET_OK (offset)
8403 && SPE_CONST_OFFSET_OK (offset + 8));
8404 /* fall through */
8406 case TDmode:
8407 case TImode:
8408 case PTImode:
8409 extra = 8;
8410 if (!worst_case)
8411 break;
8412 if (!TARGET_POWERPC64)
8413 extra = 12;
8414 else if (offset & 3)
8415 return false;
8416 break;
8418 default:
8419 break;
8422 offset += 0x8000;
8423 return offset < 0x10000 - extra;
8426 bool
8427 legitimate_indexed_address_p (rtx x, int strict)
8429 rtx op0, op1;
8431 if (GET_CODE (x) != PLUS)
8432 return false;
8434 op0 = XEXP (x, 0);
8435 op1 = XEXP (x, 1);
8437 /* Recognize the rtl generated by reload which we know will later be
8438 replaced with proper base and index regs. */
8439 if (!strict
8440 && reload_in_progress
8441 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8442 && REG_P (op1))
8443 return true;
8445 return (REG_P (op0) && REG_P (op1)
8446 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8447 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8448 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8449 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8452 bool
8453 avoiding_indexed_address_p (machine_mode mode)
8455 /* Avoid indexed addressing for modes that have non-indexed
8456 load/store instruction forms. */
8457 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8460 bool
8461 legitimate_indirect_address_p (rtx x, int strict)
8463 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8466 bool
8467 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8469 if (!TARGET_MACHO || !flag_pic
8470 || mode != SImode || GET_CODE (x) != MEM)
8471 return false;
8472 x = XEXP (x, 0);
8474 if (GET_CODE (x) != LO_SUM)
8475 return false;
8476 if (GET_CODE (XEXP (x, 0)) != REG)
8477 return false;
8478 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8479 return false;
8480 x = XEXP (x, 1);
8482 return CONSTANT_P (x);
8485 static bool
8486 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8488 if (GET_CODE (x) != LO_SUM)
8489 return false;
8490 if (GET_CODE (XEXP (x, 0)) != REG)
8491 return false;
8492 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8493 return false;
8494 /* quad word addresses are restricted, and we can't use LO_SUM. */
8495 if (mode_supports_vsx_dform_quad (mode))
8496 return false;
8497 /* Restrict addressing for DI because of our SUBREG hackery. */
8498 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8499 return false;
8500 x = XEXP (x, 1);
8502 if (TARGET_ELF || TARGET_MACHO)
8504 bool large_toc_ok;
8506 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8507 return false;
8508 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8509 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8510 recognizes some LO_SUM addresses as valid although this
8511 function says opposite. In most cases, LRA through different
8512 transformations can generate correct code for address reloads.
8513 It can not manage only some LO_SUM cases. So we need to add
8514 code analogous to one in rs6000_legitimize_reload_address for
8515 LOW_SUM here saying that some addresses are still valid. */
8516 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8517 && small_toc_ref (x, VOIDmode));
8518 if (TARGET_TOC && ! large_toc_ok)
8519 return false;
8520 if (GET_MODE_NUNITS (mode) != 1)
8521 return false;
8522 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8523 && !(/* ??? Assume floating point reg based on mode? */
8524 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8525 && (mode == DFmode || mode == DDmode)))
8526 return false;
8528 return CONSTANT_P (x) || large_toc_ok;
8531 return false;
8535 /* Try machine-dependent ways of modifying an illegitimate address
8536 to be legitimate. If we find one, return the new, valid address.
8537 This is used from only one place: `memory_address' in explow.c.
8539 OLDX is the address as it was before break_out_memory_refs was
8540 called. In some cases it is useful to look at this to decide what
8541 needs to be done.
8543 It is always safe for this function to do nothing. It exists to
8544 recognize opportunities to optimize the output.
8546 On RS/6000, first check for the sum of a register with a constant
8547 integer that is out of range. If so, generate code to add the
8548 constant with the low-order 16 bits masked to the register and force
8549 this result into another register (this can be done with `cau').
8550 Then generate an address of REG+(CONST&0xffff), allowing for the
8551 possibility of bit 16 being a one.
8553 Then check for the sum of a register and something not constant, try to
8554 load the other things into a register and return the sum. */
8556 static rtx
8557 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8558 machine_mode mode)
8560 unsigned int extra;
8562 if (!reg_offset_addressing_ok_p (mode)
8563 || mode_supports_vsx_dform_quad (mode))
8565 if (virtual_stack_registers_memory_p (x))
8566 return x;
8568 /* In theory we should not be seeing addresses of the form reg+0,
8569 but just in case it is generated, optimize it away. */
8570 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8571 return force_reg (Pmode, XEXP (x, 0));
8573 /* For TImode with load/store quad, restrict addresses to just a single
8574 pointer, so it works with both GPRs and VSX registers. */
8575 /* Make sure both operands are registers. */
8576 else if (GET_CODE (x) == PLUS
8577 && (mode != TImode || !TARGET_VSX_TIMODE))
8578 return gen_rtx_PLUS (Pmode,
8579 force_reg (Pmode, XEXP (x, 0)),
8580 force_reg (Pmode, XEXP (x, 1)));
8581 else
8582 return force_reg (Pmode, x);
8584 if (GET_CODE (x) == SYMBOL_REF)
8586 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8587 if (model != 0)
8588 return rs6000_legitimize_tls_address (x, model);
8591 extra = 0;
8592 switch (mode)
8594 case TFmode:
8595 case TDmode:
8596 case TImode:
8597 case PTImode:
8598 case IFmode:
8599 case KFmode:
8600 /* As in legitimate_offset_address_p we do not assume
8601 worst-case. The mode here is just a hint as to the registers
8602 used. A TImode is usually in gprs, but may actually be in
8603 fprs. Leave worst-case scenario for reload to handle via
8604 insn constraints. PTImode is only GPRs. */
8605 extra = 8;
8606 break;
8607 default:
8608 break;
8611 if (GET_CODE (x) == PLUS
8612 && GET_CODE (XEXP (x, 0)) == REG
8613 && GET_CODE (XEXP (x, 1)) == CONST_INT
8614 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8615 >= 0x10000 - extra)
8616 && !(SPE_VECTOR_MODE (mode)
8617 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8619 HOST_WIDE_INT high_int, low_int;
8620 rtx sum;
8621 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8622 if (low_int >= 0x8000 - extra)
8623 low_int = 0;
8624 high_int = INTVAL (XEXP (x, 1)) - low_int;
8625 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8626 GEN_INT (high_int)), 0);
8627 return plus_constant (Pmode, sum, low_int);
8629 else if (GET_CODE (x) == PLUS
8630 && GET_CODE (XEXP (x, 0)) == REG
8631 && GET_CODE (XEXP (x, 1)) != CONST_INT
8632 && GET_MODE_NUNITS (mode) == 1
8633 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8634 || (/* ??? Assume floating point reg based on mode? */
8635 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8636 && (mode == DFmode || mode == DDmode)))
8637 && !avoiding_indexed_address_p (mode))
8639 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8640 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8642 else if (SPE_VECTOR_MODE (mode)
8643 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8645 if (mode == DImode)
8646 return x;
8647 /* We accept [reg + reg] and [reg + OFFSET]. */
8649 if (GET_CODE (x) == PLUS)
8651 rtx op1 = XEXP (x, 0);
8652 rtx op2 = XEXP (x, 1);
8653 rtx y;
8655 op1 = force_reg (Pmode, op1);
8657 if (GET_CODE (op2) != REG
8658 && (GET_CODE (op2) != CONST_INT
8659 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8660 || (GET_MODE_SIZE (mode) > 8
8661 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8662 op2 = force_reg (Pmode, op2);
8664 /* We can't always do [reg + reg] for these, because [reg +
8665 reg + offset] is not a legitimate addressing mode. */
8666 y = gen_rtx_PLUS (Pmode, op1, op2);
8668 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8669 return force_reg (Pmode, y);
8670 else
8671 return y;
8674 return force_reg (Pmode, x);
8676 else if ((TARGET_ELF
8677 #if TARGET_MACHO
8678 || !MACHO_DYNAMIC_NO_PIC_P
8679 #endif
8681 && TARGET_32BIT
8682 && TARGET_NO_TOC
8683 && ! flag_pic
8684 && GET_CODE (x) != CONST_INT
8685 && GET_CODE (x) != CONST_WIDE_INT
8686 && GET_CODE (x) != CONST_DOUBLE
8687 && CONSTANT_P (x)
8688 && GET_MODE_NUNITS (mode) == 1
8689 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8690 || (/* ??? Assume floating point reg based on mode? */
8691 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8692 && (mode == DFmode || mode == DDmode))))
8694 rtx reg = gen_reg_rtx (Pmode);
8695 if (TARGET_ELF)
8696 emit_insn (gen_elf_high (reg, x));
8697 else
8698 emit_insn (gen_macho_high (reg, x));
8699 return gen_rtx_LO_SUM (Pmode, reg, x);
8701 else if (TARGET_TOC
8702 && GET_CODE (x) == SYMBOL_REF
8703 && constant_pool_expr_p (x)
8704 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8705 return create_TOC_reference (x, NULL_RTX);
8706 else
8707 return x;
8710 /* Debug version of rs6000_legitimize_address. */
8711 static rtx
8712 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8714 rtx ret;
8715 rtx_insn *insns;
8717 start_sequence ();
8718 ret = rs6000_legitimize_address (x, oldx, mode);
8719 insns = get_insns ();
8720 end_sequence ();
8722 if (ret != x)
8724 fprintf (stderr,
8725 "\nrs6000_legitimize_address: mode %s, old code %s, "
8726 "new code %s, modified\n",
8727 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8728 GET_RTX_NAME (GET_CODE (ret)));
8730 fprintf (stderr, "Original address:\n");
8731 debug_rtx (x);
8733 fprintf (stderr, "oldx:\n");
8734 debug_rtx (oldx);
8736 fprintf (stderr, "New address:\n");
8737 debug_rtx (ret);
8739 if (insns)
8741 fprintf (stderr, "Insns added:\n");
8742 debug_rtx_list (insns, 20);
8745 else
8747 fprintf (stderr,
8748 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8749 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8751 debug_rtx (x);
8754 if (insns)
8755 emit_insn (insns);
8757 return ret;
8760 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8761 We need to emit DTP-relative relocations. */
8763 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8764 static void
8765 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8767 switch (size)
8769 case 4:
8770 fputs ("\t.long\t", file);
8771 break;
8772 case 8:
8773 fputs (DOUBLE_INT_ASM_OP, file);
8774 break;
8775 default:
8776 gcc_unreachable ();
8778 output_addr_const (file, x);
8779 if (TARGET_ELF)
8780 fputs ("@dtprel+0x8000", file);
8781 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8783 switch (SYMBOL_REF_TLS_MODEL (x))
8785 case 0:
8786 break;
8787 case TLS_MODEL_LOCAL_EXEC:
8788 fputs ("@le", file);
8789 break;
8790 case TLS_MODEL_INITIAL_EXEC:
8791 fputs ("@ie", file);
8792 break;
8793 case TLS_MODEL_GLOBAL_DYNAMIC:
8794 case TLS_MODEL_LOCAL_DYNAMIC:
8795 fputs ("@m", file);
8796 break;
8797 default:
8798 gcc_unreachable ();
8803 /* Return true if X is a symbol that refers to real (rather than emulated)
8804 TLS. */
8806 static bool
8807 rs6000_real_tls_symbol_ref_p (rtx x)
8809 return (GET_CODE (x) == SYMBOL_REF
8810 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8813 /* In the name of slightly smaller debug output, and to cater to
8814 general assembler lossage, recognize various UNSPEC sequences
8815 and turn them back into a direct symbol reference. */
8817 static rtx
8818 rs6000_delegitimize_address (rtx orig_x)
8820 rtx x, y, offset;
8822 orig_x = delegitimize_mem_from_attrs (orig_x);
8823 x = orig_x;
8824 if (MEM_P (x))
8825 x = XEXP (x, 0);
8827 y = x;
8828 if (TARGET_CMODEL != CMODEL_SMALL
8829 && GET_CODE (y) == LO_SUM)
8830 y = XEXP (y, 1);
8832 offset = NULL_RTX;
8833 if (GET_CODE (y) == PLUS
8834 && GET_MODE (y) == Pmode
8835 && CONST_INT_P (XEXP (y, 1)))
8837 offset = XEXP (y, 1);
8838 y = XEXP (y, 0);
8841 if (GET_CODE (y) == UNSPEC
8842 && XINT (y, 1) == UNSPEC_TOCREL)
8844 y = XVECEXP (y, 0, 0);
8846 #ifdef HAVE_AS_TLS
8847 /* Do not associate thread-local symbols with the original
8848 constant pool symbol. */
8849 if (TARGET_XCOFF
8850 && GET_CODE (y) == SYMBOL_REF
8851 && CONSTANT_POOL_ADDRESS_P (y)
8852 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8853 return orig_x;
8854 #endif
8856 if (offset != NULL_RTX)
8857 y = gen_rtx_PLUS (Pmode, y, offset);
8858 if (!MEM_P (orig_x))
8859 return y;
8860 else
8861 return replace_equiv_address_nv (orig_x, y);
8864 if (TARGET_MACHO
8865 && GET_CODE (orig_x) == LO_SUM
8866 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8868 y = XEXP (XEXP (orig_x, 1), 0);
8869 if (GET_CODE (y) == UNSPEC
8870 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8871 return XVECEXP (y, 0, 0);
8874 return orig_x;
8877 /* Return true if X shouldn't be emitted into the debug info.
8878 The linker doesn't like .toc section references from
8879 .debug_* sections, so reject .toc section symbols. */
8881 static bool
8882 rs6000_const_not_ok_for_debug_p (rtx x)
8884 if (GET_CODE (x) == SYMBOL_REF
8885 && CONSTANT_POOL_ADDRESS_P (x))
8887 rtx c = get_pool_constant (x);
8888 machine_mode cmode = get_pool_mode (x);
8889 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8890 return true;
8893 return false;
8896 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8898 static GTY(()) rtx rs6000_tls_symbol;
8899 static rtx
8900 rs6000_tls_get_addr (void)
8902 if (!rs6000_tls_symbol)
8903 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8905 return rs6000_tls_symbol;
8908 /* Construct the SYMBOL_REF for TLS GOT references. */
8910 static GTY(()) rtx rs6000_got_symbol;
8911 static rtx
8912 rs6000_got_sym (void)
8914 if (!rs6000_got_symbol)
8916 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8917 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8918 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8921 return rs6000_got_symbol;
8924 /* AIX Thread-Local Address support. */
8926 static rtx
8927 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8929 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8930 const char *name;
8931 char *tlsname;
8933 name = XSTR (addr, 0);
8934 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8935 or the symbol will be in TLS private data section. */
8936 if (name[strlen (name) - 1] != ']'
8937 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8938 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8940 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8941 strcpy (tlsname, name);
8942 strcat (tlsname,
8943 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8944 tlsaddr = copy_rtx (addr);
8945 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8947 else
8948 tlsaddr = addr;
8950 /* Place addr into TOC constant pool. */
8951 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8953 /* Output the TOC entry and create the MEM referencing the value. */
8954 if (constant_pool_expr_p (XEXP (sym, 0))
8955 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8957 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8958 mem = gen_const_mem (Pmode, tocref);
8959 set_mem_alias_set (mem, get_TOC_alias_set ());
8961 else
8962 return sym;
8964 /* Use global-dynamic for local-dynamic. */
8965 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8966 || model == TLS_MODEL_LOCAL_DYNAMIC)
8968 /* Create new TOC reference for @m symbol. */
8969 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8970 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8971 strcpy (tlsname, "*LCM");
8972 strcat (tlsname, name + 3);
8973 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8974 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8975 tocref = create_TOC_reference (modaddr, NULL_RTX);
8976 rtx modmem = gen_const_mem (Pmode, tocref);
8977 set_mem_alias_set (modmem, get_TOC_alias_set ());
8979 rtx modreg = gen_reg_rtx (Pmode);
8980 emit_insn (gen_rtx_SET (modreg, modmem));
8982 tmpreg = gen_reg_rtx (Pmode);
8983 emit_insn (gen_rtx_SET (tmpreg, mem));
8985 dest = gen_reg_rtx (Pmode);
8986 if (TARGET_32BIT)
8987 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8988 else
8989 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8990 return dest;
8992 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8993 else if (TARGET_32BIT)
8995 tlsreg = gen_reg_rtx (SImode);
8996 emit_insn (gen_tls_get_tpointer (tlsreg));
8998 else
8999 tlsreg = gen_rtx_REG (DImode, 13);
9001 /* Load the TOC value into temporary register. */
9002 tmpreg = gen_reg_rtx (Pmode);
9003 emit_insn (gen_rtx_SET (tmpreg, mem));
9004 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9005 gen_rtx_MINUS (Pmode, addr, tlsreg));
9007 /* Add TOC symbol value to TLS pointer. */
9008 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9010 return dest;
9013 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9014 this (thread-local) address. */
9016 static rtx
9017 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9019 rtx dest, insn;
9021 if (TARGET_XCOFF)
9022 return rs6000_legitimize_tls_address_aix (addr, model);
9024 dest = gen_reg_rtx (Pmode);
9025 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9027 rtx tlsreg;
9029 if (TARGET_64BIT)
9031 tlsreg = gen_rtx_REG (Pmode, 13);
9032 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9034 else
9036 tlsreg = gen_rtx_REG (Pmode, 2);
9037 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9039 emit_insn (insn);
9041 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9043 rtx tlsreg, tmp;
9045 tmp = gen_reg_rtx (Pmode);
9046 if (TARGET_64BIT)
9048 tlsreg = gen_rtx_REG (Pmode, 13);
9049 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9051 else
9053 tlsreg = gen_rtx_REG (Pmode, 2);
9054 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9056 emit_insn (insn);
9057 if (TARGET_64BIT)
9058 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9059 else
9060 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9061 emit_insn (insn);
9063 else
9065 rtx r3, got, tga, tmp1, tmp2, call_insn;
9067 /* We currently use relocations like @got@tlsgd for tls, which
9068 means the linker will handle allocation of tls entries, placing
9069 them in the .got section. So use a pointer to the .got section,
9070 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9071 or to secondary GOT sections used by 32-bit -fPIC. */
9072 if (TARGET_64BIT)
9073 got = gen_rtx_REG (Pmode, 2);
9074 else
9076 if (flag_pic == 1)
9077 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9078 else
9080 rtx gsym = rs6000_got_sym ();
9081 got = gen_reg_rtx (Pmode);
9082 if (flag_pic == 0)
9083 rs6000_emit_move (got, gsym, Pmode);
9084 else
9086 rtx mem, lab, last;
9088 tmp1 = gen_reg_rtx (Pmode);
9089 tmp2 = gen_reg_rtx (Pmode);
9090 mem = gen_const_mem (Pmode, tmp1);
9091 lab = gen_label_rtx ();
9092 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9093 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9094 if (TARGET_LINK_STACK)
9095 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9096 emit_move_insn (tmp2, mem);
9097 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9098 set_unique_reg_note (last, REG_EQUAL, gsym);
9103 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9105 tga = rs6000_tls_get_addr ();
9106 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9107 1, const0_rtx, Pmode);
9109 r3 = gen_rtx_REG (Pmode, 3);
9110 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9112 if (TARGET_64BIT)
9113 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9114 else
9115 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9117 else if (DEFAULT_ABI == ABI_V4)
9118 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9119 else
9120 gcc_unreachable ();
9121 call_insn = last_call_insn ();
9122 PATTERN (call_insn) = insn;
9123 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9124 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9125 pic_offset_table_rtx);
9127 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9129 tga = rs6000_tls_get_addr ();
9130 tmp1 = gen_reg_rtx (Pmode);
9131 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9132 1, const0_rtx, Pmode);
9134 r3 = gen_rtx_REG (Pmode, 3);
9135 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9137 if (TARGET_64BIT)
9138 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9139 else
9140 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9142 else if (DEFAULT_ABI == ABI_V4)
9143 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9144 else
9145 gcc_unreachable ();
9146 call_insn = last_call_insn ();
9147 PATTERN (call_insn) = insn;
9148 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9149 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9150 pic_offset_table_rtx);
9152 if (rs6000_tls_size == 16)
9154 if (TARGET_64BIT)
9155 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9156 else
9157 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9159 else if (rs6000_tls_size == 32)
9161 tmp2 = gen_reg_rtx (Pmode);
9162 if (TARGET_64BIT)
9163 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9164 else
9165 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9166 emit_insn (insn);
9167 if (TARGET_64BIT)
9168 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9169 else
9170 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9172 else
9174 tmp2 = gen_reg_rtx (Pmode);
9175 if (TARGET_64BIT)
9176 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9177 else
9178 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9179 emit_insn (insn);
9180 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9182 emit_insn (insn);
9184 else
9186 /* IE, or 64-bit offset LE. */
9187 tmp2 = gen_reg_rtx (Pmode);
9188 if (TARGET_64BIT)
9189 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9190 else
9191 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9192 emit_insn (insn);
9193 if (TARGET_64BIT)
9194 insn = gen_tls_tls_64 (dest, tmp2, addr);
9195 else
9196 insn = gen_tls_tls_32 (dest, tmp2, addr);
9197 emit_insn (insn);
9201 return dest;
9204 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9206 static bool
9207 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9209 if (GET_CODE (x) == HIGH
9210 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9211 return true;
9213 /* A TLS symbol in the TOC cannot contain a sum. */
9214 if (GET_CODE (x) == CONST
9215 && GET_CODE (XEXP (x, 0)) == PLUS
9216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9217 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9218 return true;
9220 /* Do not place an ELF TLS symbol in the constant pool. */
9221 return TARGET_ELF && tls_referenced_p (x);
9224 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9225 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9226 can be addressed relative to the toc pointer. */
9228 static bool
9229 use_toc_relative_ref (rtx sym, machine_mode mode)
9231 return ((constant_pool_expr_p (sym)
9232 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9233 get_pool_mode (sym)))
9234 || (TARGET_CMODEL == CMODEL_MEDIUM
9235 && SYMBOL_REF_LOCAL_P (sym)
9236 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9239 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9240 replace the input X, or the original X if no replacement is called for.
9241 The output parameter *WIN is 1 if the calling macro should goto WIN,
9242 0 if it should not.
9244 For RS/6000, we wish to handle large displacements off a base
9245 register by splitting the addend across an addiu/addis and the mem insn.
9246 This cuts number of extra insns needed from 3 to 1.
9248 On Darwin, we use this to generate code for floating point constants.
9249 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9250 The Darwin code is inside #if TARGET_MACHO because only then are the
9251 machopic_* functions defined. */
9252 static rtx
9253 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9254 int opnum, int type,
9255 int ind_levels ATTRIBUTE_UNUSED, int *win)
9257 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9258 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9260 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9261 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9262 if (reg_offset_p
9263 && opnum == 1
9264 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9265 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9266 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9267 && TARGET_P9_VECTOR)
9268 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9269 && TARGET_P9_VECTOR)))
9270 reg_offset_p = false;
9272 /* We must recognize output that we have already generated ourselves. */
9273 if (GET_CODE (x) == PLUS
9274 && GET_CODE (XEXP (x, 0)) == PLUS
9275 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9276 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9277 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9279 if (TARGET_DEBUG_ADDR)
9281 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9282 debug_rtx (x);
9284 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9285 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9286 opnum, (enum reload_type) type);
9287 *win = 1;
9288 return x;
9291 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9292 if (GET_CODE (x) == LO_SUM
9293 && GET_CODE (XEXP (x, 0)) == HIGH)
9295 if (TARGET_DEBUG_ADDR)
9297 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9298 debug_rtx (x);
9300 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9301 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9302 opnum, (enum reload_type) type);
9303 *win = 1;
9304 return x;
9307 #if TARGET_MACHO
9308 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9309 && GET_CODE (x) == LO_SUM
9310 && GET_CODE (XEXP (x, 0)) == PLUS
9311 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9313 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9314 && machopic_operand_p (XEXP (x, 1)))
9316 /* Result of previous invocation of this function on Darwin
9317 floating point constant. */
9318 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9319 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9320 opnum, (enum reload_type) type);
9321 *win = 1;
9322 return x;
9324 #endif
9326 if (TARGET_CMODEL != CMODEL_SMALL
9327 && reg_offset_p
9328 && !quad_offset_p
9329 && small_toc_ref (x, VOIDmode))
9331 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9332 x = gen_rtx_LO_SUM (Pmode, hi, x);
9333 if (TARGET_DEBUG_ADDR)
9335 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9336 debug_rtx (x);
9338 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9339 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9340 opnum, (enum reload_type) type);
9341 *win = 1;
9342 return x;
9345 if (GET_CODE (x) == PLUS
9346 && REG_P (XEXP (x, 0))
9347 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9348 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9349 && CONST_INT_P (XEXP (x, 1))
9350 && reg_offset_p
9351 && !SPE_VECTOR_MODE (mode)
9352 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9353 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9355 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9356 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9357 HOST_WIDE_INT high
9358 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9360 /* Check for 32-bit overflow or quad addresses with one of the
9361 four least significant bits set. */
9362 if (high + low != val
9363 || (quad_offset_p && (low & 0xf)))
9365 *win = 0;
9366 return x;
9369 /* Reload the high part into a base reg; leave the low part
9370 in the mem directly. */
9372 x = gen_rtx_PLUS (GET_MODE (x),
9373 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9374 GEN_INT (high)),
9375 GEN_INT (low));
9377 if (TARGET_DEBUG_ADDR)
9379 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9380 debug_rtx (x);
9382 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9383 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9384 opnum, (enum reload_type) type);
9385 *win = 1;
9386 return x;
9389 if (GET_CODE (x) == SYMBOL_REF
9390 && reg_offset_p
9391 && !quad_offset_p
9392 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9393 && !SPE_VECTOR_MODE (mode)
9394 #if TARGET_MACHO
9395 && DEFAULT_ABI == ABI_DARWIN
9396 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9397 && machopic_symbol_defined_p (x)
9398 #else
9399 && DEFAULT_ABI == ABI_V4
9400 && !flag_pic
9401 #endif
9402 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9403 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9404 without fprs.
9405 ??? Assume floating point reg based on mode? This assumption is
9406 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9407 where reload ends up doing a DFmode load of a constant from
9408 mem using two gprs. Unfortunately, at this point reload
9409 hasn't yet selected regs so poking around in reload data
9410 won't help and even if we could figure out the regs reliably,
9411 we'd still want to allow this transformation when the mem is
9412 naturally aligned. Since we say the address is good here, we
9413 can't disable offsets from LO_SUMs in mem_operand_gpr.
9414 FIXME: Allow offset from lo_sum for other modes too, when
9415 mem is sufficiently aligned.
9417 Also disallow this if the type can go in VMX/Altivec registers, since
9418 those registers do not have d-form (reg+offset) address modes. */
9419 && !reg_addr[mode].scalar_in_vmx_p
9420 && mode != TFmode
9421 && mode != TDmode
9422 && mode != IFmode
9423 && mode != KFmode
9424 && (mode != TImode || !TARGET_VSX_TIMODE)
9425 && mode != PTImode
9426 && (mode != DImode || TARGET_POWERPC64)
9427 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9428 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9430 #if TARGET_MACHO
9431 if (flag_pic)
9433 rtx offset = machopic_gen_offset (x);
9434 x = gen_rtx_LO_SUM (GET_MODE (x),
9435 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9436 gen_rtx_HIGH (Pmode, offset)), offset);
9438 else
9439 #endif
9440 x = gen_rtx_LO_SUM (GET_MODE (x),
9441 gen_rtx_HIGH (Pmode, x), x);
9443 if (TARGET_DEBUG_ADDR)
9445 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9446 debug_rtx (x);
9448 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9449 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9450 opnum, (enum reload_type) type);
9451 *win = 1;
9452 return x;
9455 /* Reload an offset address wrapped by an AND that represents the
9456 masking of the lower bits. Strip the outer AND and let reload
9457 convert the offset address into an indirect address. For VSX,
9458 force reload to create the address with an AND in a separate
9459 register, because we can't guarantee an altivec register will
9460 be used. */
9461 if (VECTOR_MEM_ALTIVEC_P (mode)
9462 && GET_CODE (x) == AND
9463 && GET_CODE (XEXP (x, 0)) == PLUS
9464 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9465 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9466 && GET_CODE (XEXP (x, 1)) == CONST_INT
9467 && INTVAL (XEXP (x, 1)) == -16)
9469 x = XEXP (x, 0);
9470 *win = 1;
9471 return x;
9474 if (TARGET_TOC
9475 && reg_offset_p
9476 && !quad_offset_p
9477 && GET_CODE (x) == SYMBOL_REF
9478 && use_toc_relative_ref (x, mode))
9480 x = create_TOC_reference (x, NULL_RTX);
9481 if (TARGET_CMODEL != CMODEL_SMALL)
9483 if (TARGET_DEBUG_ADDR)
9485 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9486 debug_rtx (x);
9488 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9489 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9490 opnum, (enum reload_type) type);
9492 *win = 1;
9493 return x;
9495 *win = 0;
9496 return x;
9499 /* Debug version of rs6000_legitimize_reload_address. */
9500 static rtx
9501 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9502 int opnum, int type,
9503 int ind_levels, int *win)
9505 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9506 ind_levels, win);
9507 fprintf (stderr,
9508 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9509 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9510 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9511 debug_rtx (x);
9513 if (x == ret)
9514 fprintf (stderr, "Same address returned\n");
9515 else if (!ret)
9516 fprintf (stderr, "NULL returned\n");
9517 else
9519 fprintf (stderr, "New address:\n");
9520 debug_rtx (ret);
9523 return ret;
9526 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9527 that is a valid memory address for an instruction.
9528 The MODE argument is the machine mode for the MEM expression
9529 that wants to use this address.
9531 On the RS/6000, there are four valid address: a SYMBOL_REF that
9532 refers to a constant pool entry of an address (or the sum of it
9533 plus a constant), a short (16-bit signed) constant plus a register,
9534 the sum of two registers, or a register indirect, possibly with an
9535 auto-increment. For DFmode, DDmode and DImode with a constant plus
9536 register, we must ensure that both words are addressable or PowerPC64
9537 with offset word aligned.
9539 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9540 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9541 because adjacent memory cells are accessed by adding word-sized offsets
9542 during assembly output. */
9543 static bool
9544 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9546 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9547 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9549 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9550 if (VECTOR_MEM_ALTIVEC_P (mode)
9551 && GET_CODE (x) == AND
9552 && GET_CODE (XEXP (x, 1)) == CONST_INT
9553 && INTVAL (XEXP (x, 1)) == -16)
9554 x = XEXP (x, 0);
9556 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9557 return 0;
9558 if (legitimate_indirect_address_p (x, reg_ok_strict))
9559 return 1;
9560 if (TARGET_UPDATE
9561 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9562 && mode_supports_pre_incdec_p (mode)
9563 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9564 return 1;
9565 /* Handle restricted vector d-form offsets in ISA 3.0. */
9566 if (quad_offset_p)
9568 if (quad_address_p (x, mode, reg_ok_strict))
9569 return 1;
9571 else if (virtual_stack_registers_memory_p (x))
9572 return 1;
9574 else if (reg_offset_p)
9576 if (legitimate_small_data_p (mode, x))
9577 return 1;
9578 if (legitimate_constant_pool_address_p (x, mode,
9579 reg_ok_strict || lra_in_progress))
9580 return 1;
9581 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9582 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9583 return 1;
9586 /* For TImode, if we have TImode in VSX registers, only allow register
9587 indirect addresses. This will allow the values to go in either GPRs
9588 or VSX registers without reloading. The vector types would tend to
9589 go into VSX registers, so we allow REG+REG, while TImode seems
9590 somewhat split, in that some uses are GPR based, and some VSX based. */
9591 /* FIXME: We could loosen this by changing the following to
9592 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9593 but currently we cannot allow REG+REG addressing for TImode. See
9594 PR72827 for complete details on how this ends up hoodwinking DSE. */
9595 if (mode == TImode && TARGET_VSX_TIMODE)
9596 return 0;
9597 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9598 if (! reg_ok_strict
9599 && reg_offset_p
9600 && GET_CODE (x) == PLUS
9601 && GET_CODE (XEXP (x, 0)) == REG
9602 && (XEXP (x, 0) == virtual_stack_vars_rtx
9603 || XEXP (x, 0) == arg_pointer_rtx)
9604 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9605 return 1;
9606 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9607 return 1;
9608 if (!FLOAT128_2REG_P (mode)
9609 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9610 || TARGET_POWERPC64
9611 || (mode != DFmode && mode != DDmode)
9612 || (TARGET_E500_DOUBLE && mode != DDmode))
9613 && (TARGET_POWERPC64 || mode != DImode)
9614 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9615 && mode != PTImode
9616 && !avoiding_indexed_address_p (mode)
9617 && legitimate_indexed_address_p (x, reg_ok_strict))
9618 return 1;
9619 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9620 && mode_supports_pre_modify_p (mode)
9621 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9622 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9623 reg_ok_strict, false)
9624 || (!avoiding_indexed_address_p (mode)
9625 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9626 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9627 return 1;
9628 if (reg_offset_p && !quad_offset_p
9629 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9630 return 1;
9631 return 0;
9634 /* Debug version of rs6000_legitimate_address_p. */
9635 static bool
9636 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9637 bool reg_ok_strict)
9639 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9640 fprintf (stderr,
9641 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9642 "strict = %d, reload = %s, code = %s\n",
9643 ret ? "true" : "false",
9644 GET_MODE_NAME (mode),
9645 reg_ok_strict,
9646 (reload_completed
9647 ? "after"
9648 : (reload_in_progress ? "progress" : "before")),
9649 GET_RTX_NAME (GET_CODE (x)));
9650 debug_rtx (x);
9652 return ret;
9655 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9657 static bool
9658 rs6000_mode_dependent_address_p (const_rtx addr,
9659 addr_space_t as ATTRIBUTE_UNUSED)
9661 return rs6000_mode_dependent_address_ptr (addr);
9664 /* Go to LABEL if ADDR (a legitimate address expression)
9665 has an effect that depends on the machine mode it is used for.
9667 On the RS/6000 this is true of all integral offsets (since AltiVec
9668 and VSX modes don't allow them) or is a pre-increment or decrement.
9670 ??? Except that due to conceptual problems in offsettable_address_p
9671 we can't really report the problems of integral offsets. So leave
9672 this assuming that the adjustable offset must be valid for the
9673 sub-words of a TFmode operand, which is what we had before. */
9675 static bool
9676 rs6000_mode_dependent_address (const_rtx addr)
9678 switch (GET_CODE (addr))
9680 case PLUS:
9681 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9682 is considered a legitimate address before reload, so there
9683 are no offset restrictions in that case. Note that this
9684 condition is safe in strict mode because any address involving
9685 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9686 been rejected as illegitimate. */
9687 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9688 && XEXP (addr, 0) != arg_pointer_rtx
9689 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9691 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9692 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9694 break;
9696 case LO_SUM:
9697 /* Anything in the constant pool is sufficiently aligned that
9698 all bytes have the same high part address. */
9699 return !legitimate_constant_pool_address_p (addr, QImode, false);
9701 /* Auto-increment cases are now treated generically in recog.c. */
9702 case PRE_MODIFY:
9703 return TARGET_UPDATE;
9705 /* AND is only allowed in Altivec loads. */
9706 case AND:
9707 return true;
9709 default:
9710 break;
9713 return false;
9716 /* Debug version of rs6000_mode_dependent_address. */
9717 static bool
9718 rs6000_debug_mode_dependent_address (const_rtx addr)
9720 bool ret = rs6000_mode_dependent_address (addr);
9722 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9723 ret ? "true" : "false");
9724 debug_rtx (addr);
9726 return ret;
9729 /* Implement FIND_BASE_TERM. */
9732 rs6000_find_base_term (rtx op)
9734 rtx base;
9736 base = op;
9737 if (GET_CODE (base) == CONST)
9738 base = XEXP (base, 0);
9739 if (GET_CODE (base) == PLUS)
9740 base = XEXP (base, 0);
9741 if (GET_CODE (base) == UNSPEC)
9742 switch (XINT (base, 1))
9744 case UNSPEC_TOCREL:
9745 case UNSPEC_MACHOPIC_OFFSET:
9746 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9747 for aliasing purposes. */
9748 return XVECEXP (base, 0, 0);
9751 return op;
9754 /* More elaborate version of recog's offsettable_memref_p predicate
9755 that works around the ??? note of rs6000_mode_dependent_address.
9756 In particular it accepts
9758 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9760 in 32-bit mode, that the recog predicate rejects. */
9762 static bool
9763 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9765 bool worst_case;
9767 if (!MEM_P (op))
9768 return false;
9770 /* First mimic offsettable_memref_p. */
9771 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9772 return true;
9774 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9775 the latter predicate knows nothing about the mode of the memory
9776 reference and, therefore, assumes that it is the largest supported
9777 mode (TFmode). As a consequence, legitimate offsettable memory
9778 references are rejected. rs6000_legitimate_offset_address_p contains
9779 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9780 at least with a little bit of help here given that we know the
9781 actual registers used. */
9782 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9783 || GET_MODE_SIZE (reg_mode) == 4);
9784 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9785 true, worst_case);
9788 /* Determine the reassociation width to be used in reassociate_bb.
9789 This takes into account how many parallel operations we
9790 can actually do of a given type, and also the latency.
9792 int add/sub 6/cycle
9793 mul 2/cycle
9794 vect add/sub/mul 2/cycle
9795 fp add/sub/mul 2/cycle
9796 dfp 1/cycle
9799 static int
9800 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9801 enum machine_mode mode)
9803 switch (rs6000_cpu)
9805 case PROCESSOR_POWER8:
9806 case PROCESSOR_POWER9:
9807 if (DECIMAL_FLOAT_MODE_P (mode))
9808 return 1;
9809 if (VECTOR_MODE_P (mode))
9810 return 4;
9811 if (INTEGRAL_MODE_P (mode))
9812 return opc == MULT_EXPR ? 4 : 6;
9813 if (FLOAT_MODE_P (mode))
9814 return 4;
9815 break;
9816 default:
9817 break;
9819 return 1;
9822 /* Change register usage conditional on target flags. */
9823 static void
9824 rs6000_conditional_register_usage (void)
9826 int i;
9828 if (TARGET_DEBUG_TARGET)
9829 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9831 /* Set MQ register fixed (already call_used) so that it will not be
9832 allocated. */
9833 fixed_regs[64] = 1;
9835 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9836 if (TARGET_64BIT)
9837 fixed_regs[13] = call_used_regs[13]
9838 = call_really_used_regs[13] = 1;
9840 /* Conditionally disable FPRs. */
9841 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9842 for (i = 32; i < 64; i++)
9843 fixed_regs[i] = call_used_regs[i]
9844 = call_really_used_regs[i] = 1;
9846 /* The TOC register is not killed across calls in a way that is
9847 visible to the compiler. */
9848 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9849 call_really_used_regs[2] = 0;
9851 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9852 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9854 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9855 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9856 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9857 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9859 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9860 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9861 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9862 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9864 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9865 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9866 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9868 if (TARGET_SPE)
9870 global_regs[SPEFSCR_REGNO] = 1;
9871 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9872 registers in prologues and epilogues. We no longer use r14
9873 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9874 pool for link-compatibility with older versions of GCC. Once
9875 "old" code has died out, we can return r14 to the allocation
9876 pool. */
9877 fixed_regs[14]
9878 = call_used_regs[14]
9879 = call_really_used_regs[14] = 1;
9882 if (!TARGET_ALTIVEC && !TARGET_VSX)
9884 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9885 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9886 call_really_used_regs[VRSAVE_REGNO] = 1;
9889 if (TARGET_ALTIVEC || TARGET_VSX)
9890 global_regs[VSCR_REGNO] = 1;
9892 if (TARGET_ALTIVEC_ABI)
9894 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9895 call_used_regs[i] = call_really_used_regs[i] = 1;
9897 /* AIX reserves VR20:31 in non-extended ABI mode. */
9898 if (TARGET_XCOFF)
9899 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9900 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9905 /* Output insns to set DEST equal to the constant SOURCE as a series of
9906 lis, ori and shl instructions and return TRUE. */
9908 bool
9909 rs6000_emit_set_const (rtx dest, rtx source)
9911 machine_mode mode = GET_MODE (dest);
9912 rtx temp, set;
9913 rtx_insn *insn;
9914 HOST_WIDE_INT c;
9916 gcc_checking_assert (CONST_INT_P (source));
9917 c = INTVAL (source);
9918 switch (mode)
9920 case QImode:
9921 case HImode:
9922 emit_insn (gen_rtx_SET (dest, source));
9923 return true;
9925 case SImode:
9926 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9928 emit_insn (gen_rtx_SET (copy_rtx (temp),
9929 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9930 emit_insn (gen_rtx_SET (dest,
9931 gen_rtx_IOR (SImode, copy_rtx (temp),
9932 GEN_INT (c & 0xffff))));
9933 break;
9935 case DImode:
9936 if (!TARGET_POWERPC64)
9938 rtx hi, lo;
9940 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9941 DImode);
9942 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9943 DImode);
9944 emit_move_insn (hi, GEN_INT (c >> 32));
9945 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9946 emit_move_insn (lo, GEN_INT (c));
9948 else
9949 rs6000_emit_set_long_const (dest, c);
9950 break;
9952 default:
9953 gcc_unreachable ();
9956 insn = get_last_insn ();
9957 set = single_set (insn);
9958 if (! CONSTANT_P (SET_SRC (set)))
9959 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9961 return true;
9964 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9965 Output insns to set DEST equal to the constant C as a series of
9966 lis, ori and shl instructions. */
9968 static void
9969 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9971 rtx temp;
9972 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9974 ud1 = c & 0xffff;
9975 c = c >> 16;
9976 ud2 = c & 0xffff;
9977 c = c >> 16;
9978 ud3 = c & 0xffff;
9979 c = c >> 16;
9980 ud4 = c & 0xffff;
9982 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9983 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9984 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9986 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9987 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9989 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9991 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9992 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9993 if (ud1 != 0)
9994 emit_move_insn (dest,
9995 gen_rtx_IOR (DImode, copy_rtx (temp),
9996 GEN_INT (ud1)));
9998 else if (ud3 == 0 && ud4 == 0)
10000 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10002 gcc_assert (ud2 & 0x8000);
10003 emit_move_insn (copy_rtx (temp),
10004 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10005 if (ud1 != 0)
10006 emit_move_insn (copy_rtx (temp),
10007 gen_rtx_IOR (DImode, copy_rtx (temp),
10008 GEN_INT (ud1)));
10009 emit_move_insn (dest,
10010 gen_rtx_ZERO_EXTEND (DImode,
10011 gen_lowpart (SImode,
10012 copy_rtx (temp))));
10014 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10015 || (ud4 == 0 && ! (ud3 & 0x8000)))
10017 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10019 emit_move_insn (copy_rtx (temp),
10020 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10021 if (ud2 != 0)
10022 emit_move_insn (copy_rtx (temp),
10023 gen_rtx_IOR (DImode, copy_rtx (temp),
10024 GEN_INT (ud2)));
10025 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10026 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10027 GEN_INT (16)));
10028 if (ud1 != 0)
10029 emit_move_insn (dest,
10030 gen_rtx_IOR (DImode, copy_rtx (temp),
10031 GEN_INT (ud1)));
10033 else
10035 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10037 emit_move_insn (copy_rtx (temp),
10038 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10039 if (ud3 != 0)
10040 emit_move_insn (copy_rtx (temp),
10041 gen_rtx_IOR (DImode, copy_rtx (temp),
10042 GEN_INT (ud3)));
10044 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10045 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10046 GEN_INT (32)));
10047 if (ud2 != 0)
10048 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10049 gen_rtx_IOR (DImode, copy_rtx (temp),
10050 GEN_INT (ud2 << 16)));
10051 if (ud1 != 0)
10052 emit_move_insn (dest,
10053 gen_rtx_IOR (DImode, copy_rtx (temp),
10054 GEN_INT (ud1)));
10058 /* Helper for the following. Get rid of [r+r] memory refs
10059 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10061 static void
10062 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10064 if (reload_in_progress)
10065 return;
10067 if (GET_CODE (operands[0]) == MEM
10068 && GET_CODE (XEXP (operands[0], 0)) != REG
10069 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10070 GET_MODE (operands[0]), false))
10071 operands[0]
10072 = replace_equiv_address (operands[0],
10073 copy_addr_to_reg (XEXP (operands[0], 0)));
10075 if (GET_CODE (operands[1]) == MEM
10076 && GET_CODE (XEXP (operands[1], 0)) != REG
10077 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10078 GET_MODE (operands[1]), false))
10079 operands[1]
10080 = replace_equiv_address (operands[1],
10081 copy_addr_to_reg (XEXP (operands[1], 0)));
10084 /* Generate a vector of constants to permute MODE for a little-endian
10085 storage operation by swapping the two halves of a vector. */
10086 static rtvec
10087 rs6000_const_vec (machine_mode mode)
10089 int i, subparts;
10090 rtvec v;
10092 switch (mode)
10094 case V1TImode:
10095 subparts = 1;
10096 break;
10097 case V2DFmode:
10098 case V2DImode:
10099 subparts = 2;
10100 break;
10101 case V4SFmode:
10102 case V4SImode:
10103 subparts = 4;
10104 break;
10105 case V8HImode:
10106 subparts = 8;
10107 break;
10108 case V16QImode:
10109 subparts = 16;
10110 break;
10111 default:
10112 gcc_unreachable();
10115 v = rtvec_alloc (subparts);
10117 for (i = 0; i < subparts / 2; ++i)
10118 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10119 for (i = subparts / 2; i < subparts; ++i)
10120 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10122 return v;
10125 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10126 for a VSX load or store operation. */
10128 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10130 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10131 128-bit integers if they are allowed in VSX registers. */
10132 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
10133 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10134 else
10136 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10137 return gen_rtx_VEC_SELECT (mode, source, par);
10141 /* Emit a little-endian load from vector memory location SOURCE to VSX
10142 register DEST in mode MODE. The load is done with two permuting
10143 insn's that represent an lxvd2x and xxpermdi. */
10144 void
10145 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10147 rtx tmp, permute_mem, permute_reg;
10149 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10150 V1TImode). */
10151 if (mode == TImode || mode == V1TImode)
10153 mode = V2DImode;
10154 dest = gen_lowpart (V2DImode, dest);
10155 source = adjust_address (source, V2DImode, 0);
10158 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10159 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10160 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10161 emit_insn (gen_rtx_SET (tmp, permute_mem));
10162 emit_insn (gen_rtx_SET (dest, permute_reg));
10165 /* Emit a little-endian store to vector memory location DEST from VSX
10166 register SOURCE in mode MODE. The store is done with two permuting
10167 insn's that represent an xxpermdi and an stxvd2x. */
10168 void
10169 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10171 rtx tmp, permute_src, permute_tmp;
10173 /* This should never be called during or after reload, because it does
10174 not re-permute the source register. It is intended only for use
10175 during expand. */
10176 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10178 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10179 V1TImode). */
10180 if (mode == TImode || mode == V1TImode)
10182 mode = V2DImode;
10183 dest = adjust_address (dest, V2DImode, 0);
10184 source = gen_lowpart (V2DImode, source);
10187 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10188 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10189 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10190 emit_insn (gen_rtx_SET (tmp, permute_src));
10191 emit_insn (gen_rtx_SET (dest, permute_tmp));
10194 /* Emit a sequence representing a little-endian VSX load or store,
10195 moving data from SOURCE to DEST in mode MODE. This is done
10196 separately from rs6000_emit_move to ensure it is called only
10197 during expand. LE VSX loads and stores introduced later are
10198 handled with a split. The expand-time RTL generation allows
10199 us to optimize away redundant pairs of register-permutes. */
10200 void
10201 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10203 gcc_assert (!BYTES_BIG_ENDIAN
10204 && VECTOR_MEM_VSX_P (mode)
10205 && !TARGET_P9_VECTOR
10206 && !gpr_or_gpr_p (dest, source)
10207 && (MEM_P (source) ^ MEM_P (dest)));
10209 if (MEM_P (source))
10211 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10212 rs6000_emit_le_vsx_load (dest, source, mode);
10214 else
10216 if (!REG_P (source))
10217 source = force_reg (mode, source);
10218 rs6000_emit_le_vsx_store (dest, source, mode);
10222 /* Emit a move from SOURCE to DEST in mode MODE. */
10223 void
10224 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10226 rtx operands[2];
10227 operands[0] = dest;
10228 operands[1] = source;
10230 if (TARGET_DEBUG_ADDR)
10232 fprintf (stderr,
10233 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10234 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10235 GET_MODE_NAME (mode),
10236 reload_in_progress,
10237 reload_completed,
10238 can_create_pseudo_p ());
10239 debug_rtx (dest);
10240 fprintf (stderr, "source:\n");
10241 debug_rtx (source);
10244 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10245 if (CONST_WIDE_INT_P (operands[1])
10246 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10248 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10249 gcc_unreachable ();
10252 /* Check if GCC is setting up a block move that will end up using FP
10253 registers as temporaries. We must make sure this is acceptable. */
10254 if (GET_CODE (operands[0]) == MEM
10255 && GET_CODE (operands[1]) == MEM
10256 && mode == DImode
10257 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10258 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10259 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10260 ? 32 : MEM_ALIGN (operands[0])))
10261 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10262 ? 32
10263 : MEM_ALIGN (operands[1]))))
10264 && ! MEM_VOLATILE_P (operands [0])
10265 && ! MEM_VOLATILE_P (operands [1]))
10267 emit_move_insn (adjust_address (operands[0], SImode, 0),
10268 adjust_address (operands[1], SImode, 0));
10269 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10270 adjust_address (copy_rtx (operands[1]), SImode, 4));
10271 return;
10274 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10275 && !gpc_reg_operand (operands[1], mode))
10276 operands[1] = force_reg (mode, operands[1]);
10278 /* Recognize the case where operand[1] is a reference to thread-local
10279 data and load its address to a register. */
10280 if (tls_referenced_p (operands[1]))
10282 enum tls_model model;
10283 rtx tmp = operands[1];
10284 rtx addend = NULL;
10286 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10288 addend = XEXP (XEXP (tmp, 0), 1);
10289 tmp = XEXP (XEXP (tmp, 0), 0);
10292 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10293 model = SYMBOL_REF_TLS_MODEL (tmp);
10294 gcc_assert (model != 0);
10296 tmp = rs6000_legitimize_tls_address (tmp, model);
10297 if (addend)
10299 tmp = gen_rtx_PLUS (mode, tmp, addend);
10300 tmp = force_operand (tmp, operands[0]);
10302 operands[1] = tmp;
10305 /* Handle the case where reload calls us with an invalid address. */
10306 if (reload_in_progress && mode == Pmode
10307 && (! general_operand (operands[1], mode)
10308 || ! nonimmediate_operand (operands[0], mode)))
10309 goto emit_set;
10311 /* 128-bit constant floating-point values on Darwin should really be loaded
10312 as two parts. However, this premature splitting is a problem when DFmode
10313 values can go into Altivec registers. */
10314 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10315 && GET_CODE (operands[1]) == CONST_DOUBLE)
10317 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10318 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10319 DFmode);
10320 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10321 GET_MODE_SIZE (DFmode)),
10322 simplify_gen_subreg (DFmode, operands[1], mode,
10323 GET_MODE_SIZE (DFmode)),
10324 DFmode);
10325 return;
10328 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10329 cfun->machine->sdmode_stack_slot =
10330 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10333 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10334 p1:SD) if p1 is not of floating point class and p0 is spilled as
10335 we can have no analogous movsd_store for this. */
10336 if (lra_in_progress && mode == DDmode
10337 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10338 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10339 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10340 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10342 enum reg_class cl;
10343 int regno = REGNO (SUBREG_REG (operands[1]));
10345 if (regno >= FIRST_PSEUDO_REGISTER)
10347 cl = reg_preferred_class (regno);
10348 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10350 if (regno >= 0 && ! FP_REGNO_P (regno))
10352 mode = SDmode;
10353 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10354 operands[1] = SUBREG_REG (operands[1]);
10357 if (lra_in_progress
10358 && mode == SDmode
10359 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10360 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10361 && (REG_P (operands[1])
10362 || (GET_CODE (operands[1]) == SUBREG
10363 && REG_P (SUBREG_REG (operands[1])))))
10365 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10366 ? SUBREG_REG (operands[1]) : operands[1]);
10367 enum reg_class cl;
10369 if (regno >= FIRST_PSEUDO_REGISTER)
10371 cl = reg_preferred_class (regno);
10372 gcc_assert (cl != NO_REGS);
10373 regno = ira_class_hard_regs[cl][0];
10375 if (FP_REGNO_P (regno))
10377 if (GET_MODE (operands[0]) != DDmode)
10378 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10379 emit_insn (gen_movsd_store (operands[0], operands[1]));
10381 else if (INT_REGNO_P (regno))
10382 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10383 else
10384 gcc_unreachable();
10385 return;
10387 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10388 p:DD)) if p0 is not of floating point class and p1 is spilled as
10389 we can have no analogous movsd_load for this. */
10390 if (lra_in_progress && mode == DDmode
10391 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10392 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10393 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10394 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10396 enum reg_class cl;
10397 int regno = REGNO (SUBREG_REG (operands[0]));
10399 if (regno >= FIRST_PSEUDO_REGISTER)
10401 cl = reg_preferred_class (regno);
10402 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10404 if (regno >= 0 && ! FP_REGNO_P (regno))
10406 mode = SDmode;
10407 operands[0] = SUBREG_REG (operands[0]);
10408 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10411 if (lra_in_progress
10412 && mode == SDmode
10413 && (REG_P (operands[0])
10414 || (GET_CODE (operands[0]) == SUBREG
10415 && REG_P (SUBREG_REG (operands[0]))))
10416 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10417 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10419 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10420 ? SUBREG_REG (operands[0]) : operands[0]);
10421 enum reg_class cl;
10423 if (regno >= FIRST_PSEUDO_REGISTER)
10425 cl = reg_preferred_class (regno);
10426 gcc_assert (cl != NO_REGS);
10427 regno = ira_class_hard_regs[cl][0];
10429 if (FP_REGNO_P (regno))
10431 if (GET_MODE (operands[1]) != DDmode)
10432 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10433 emit_insn (gen_movsd_load (operands[0], operands[1]));
10435 else if (INT_REGNO_P (regno))
10436 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10437 else
10438 gcc_unreachable();
10439 return;
10442 if (reload_in_progress
10443 && mode == SDmode
10444 && cfun->machine->sdmode_stack_slot != NULL_RTX
10445 && MEM_P (operands[0])
10446 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10447 && REG_P (operands[1]))
10449 if (FP_REGNO_P (REGNO (operands[1])))
10451 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10452 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10453 emit_insn (gen_movsd_store (mem, operands[1]));
10455 else if (INT_REGNO_P (REGNO (operands[1])))
10457 rtx mem = operands[0];
10458 if (BYTES_BIG_ENDIAN)
10459 mem = adjust_address_nv (mem, mode, 4);
10460 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10461 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10463 else
10464 gcc_unreachable();
10465 return;
10467 if (reload_in_progress
10468 && mode == SDmode
10469 && REG_P (operands[0])
10470 && MEM_P (operands[1])
10471 && cfun->machine->sdmode_stack_slot != NULL_RTX
10472 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10474 if (FP_REGNO_P (REGNO (operands[0])))
10476 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10477 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10478 emit_insn (gen_movsd_load (operands[0], mem));
10480 else if (INT_REGNO_P (REGNO (operands[0])))
10482 rtx mem = operands[1];
10483 if (BYTES_BIG_ENDIAN)
10484 mem = adjust_address_nv (mem, mode, 4);
10485 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10486 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10488 else
10489 gcc_unreachable();
10490 return;
10493 /* FIXME: In the long term, this switch statement should go away
10494 and be replaced by a sequence of tests based on things like
10495 mode == Pmode. */
10496 switch (mode)
10498 case HImode:
10499 case QImode:
10500 if (CONSTANT_P (operands[1])
10501 && GET_CODE (operands[1]) != CONST_INT)
10502 operands[1] = force_const_mem (mode, operands[1]);
10503 break;
10505 case TFmode:
10506 case TDmode:
10507 case IFmode:
10508 case KFmode:
10509 if (FLOAT128_2REG_P (mode))
10510 rs6000_eliminate_indexed_memrefs (operands);
10511 /* fall through */
10513 case DFmode:
10514 case DDmode:
10515 case SFmode:
10516 case SDmode:
10517 if (CONSTANT_P (operands[1])
10518 && ! easy_fp_constant (operands[1], mode))
10519 operands[1] = force_const_mem (mode, operands[1]);
10520 break;
10522 case V16QImode:
10523 case V8HImode:
10524 case V4SFmode:
10525 case V4SImode:
10526 case V4HImode:
10527 case V2SFmode:
10528 case V2SImode:
10529 case V1DImode:
10530 case V2DFmode:
10531 case V2DImode:
10532 case V1TImode:
10533 if (CONSTANT_P (operands[1])
10534 && !easy_vector_constant (operands[1], mode))
10535 operands[1] = force_const_mem (mode, operands[1]);
10536 break;
10538 case SImode:
10539 case DImode:
10540 /* Use default pattern for address of ELF small data */
10541 if (TARGET_ELF
10542 && mode == Pmode
10543 && DEFAULT_ABI == ABI_V4
10544 && (GET_CODE (operands[1]) == SYMBOL_REF
10545 || GET_CODE (operands[1]) == CONST)
10546 && small_data_operand (operands[1], mode))
10548 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10549 return;
10552 if (DEFAULT_ABI == ABI_V4
10553 && mode == Pmode && mode == SImode
10554 && flag_pic == 1 && got_operand (operands[1], mode))
10556 emit_insn (gen_movsi_got (operands[0], operands[1]));
10557 return;
10560 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10561 && TARGET_NO_TOC
10562 && ! flag_pic
10563 && mode == Pmode
10564 && CONSTANT_P (operands[1])
10565 && GET_CODE (operands[1]) != HIGH
10566 && GET_CODE (operands[1]) != CONST_INT)
10568 rtx target = (!can_create_pseudo_p ()
10569 ? operands[0]
10570 : gen_reg_rtx (mode));
10572 /* If this is a function address on -mcall-aixdesc,
10573 convert it to the address of the descriptor. */
10574 if (DEFAULT_ABI == ABI_AIX
10575 && GET_CODE (operands[1]) == SYMBOL_REF
10576 && XSTR (operands[1], 0)[0] == '.')
10578 const char *name = XSTR (operands[1], 0);
10579 rtx new_ref;
10580 while (*name == '.')
10581 name++;
10582 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10583 CONSTANT_POOL_ADDRESS_P (new_ref)
10584 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10585 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10586 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10587 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10588 operands[1] = new_ref;
10591 if (DEFAULT_ABI == ABI_DARWIN)
10593 #if TARGET_MACHO
10594 if (MACHO_DYNAMIC_NO_PIC_P)
10596 /* Take care of any required data indirection. */
10597 operands[1] = rs6000_machopic_legitimize_pic_address (
10598 operands[1], mode, operands[0]);
10599 if (operands[0] != operands[1])
10600 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10601 return;
10603 #endif
10604 emit_insn (gen_macho_high (target, operands[1]));
10605 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10606 return;
10609 emit_insn (gen_elf_high (target, operands[1]));
10610 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10611 return;
10614 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10615 and we have put it in the TOC, we just need to make a TOC-relative
10616 reference to it. */
10617 if (TARGET_TOC
10618 && GET_CODE (operands[1]) == SYMBOL_REF
10619 && use_toc_relative_ref (operands[1], mode))
10620 operands[1] = create_TOC_reference (operands[1], operands[0]);
10621 else if (mode == Pmode
10622 && CONSTANT_P (operands[1])
10623 && GET_CODE (operands[1]) != HIGH
10624 && ((GET_CODE (operands[1]) != CONST_INT
10625 && ! easy_fp_constant (operands[1], mode))
10626 || (GET_CODE (operands[1]) == CONST_INT
10627 && (num_insns_constant (operands[1], mode)
10628 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10629 || (GET_CODE (operands[0]) == REG
10630 && FP_REGNO_P (REGNO (operands[0]))))
10631 && !toc_relative_expr_p (operands[1], false)
10632 && (TARGET_CMODEL == CMODEL_SMALL
10633 || can_create_pseudo_p ()
10634 || (REG_P (operands[0])
10635 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10638 #if TARGET_MACHO
10639 /* Darwin uses a special PIC legitimizer. */
10640 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10642 operands[1] =
10643 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10644 operands[0]);
10645 if (operands[0] != operands[1])
10646 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10647 return;
10649 #endif
10651 /* If we are to limit the number of things we put in the TOC and
10652 this is a symbol plus a constant we can add in one insn,
10653 just put the symbol in the TOC and add the constant. Don't do
10654 this if reload is in progress. */
10655 if (GET_CODE (operands[1]) == CONST
10656 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10657 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10658 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10659 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10660 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10661 && ! side_effects_p (operands[0]))
10663 rtx sym =
10664 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10665 rtx other = XEXP (XEXP (operands[1], 0), 1);
10667 sym = force_reg (mode, sym);
10668 emit_insn (gen_add3_insn (operands[0], sym, other));
10669 return;
10672 operands[1] = force_const_mem (mode, operands[1]);
10674 if (TARGET_TOC
10675 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10676 && constant_pool_expr_p (XEXP (operands[1], 0))
10677 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10678 get_pool_constant (XEXP (operands[1], 0)),
10679 get_pool_mode (XEXP (operands[1], 0))))
10681 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10682 operands[0]);
10683 operands[1] = gen_const_mem (mode, tocref);
10684 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10687 break;
10689 case TImode:
10690 if (!VECTOR_MEM_VSX_P (TImode))
10691 rs6000_eliminate_indexed_memrefs (operands);
10692 break;
10694 case PTImode:
10695 rs6000_eliminate_indexed_memrefs (operands);
10696 break;
10698 default:
10699 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10702 /* Above, we may have called force_const_mem which may have returned
10703 an invalid address. If we can, fix this up; otherwise, reload will
10704 have to deal with it. */
10705 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10706 operands[1] = validize_mem (operands[1]);
10708 emit_set:
10709 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10712 /* Return true if a structure, union or array containing FIELD should be
10713 accessed using `BLKMODE'.
10715 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10716 entire thing in a DI and use subregs to access the internals.
10717 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10718 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10719 best thing to do is set structs to BLKmode and avoid Severe Tire
10720 Damage.
10722 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10723 fit into 1, whereas DI still needs two. */
10725 static bool
10726 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10728 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10729 || (TARGET_E500_DOUBLE && mode == DFmode));
10732 /* Nonzero if we can use a floating-point register to pass this arg. */
10733 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10734 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10735 && (CUM)->fregno <= FP_ARG_MAX_REG \
10736 && TARGET_HARD_FLOAT && TARGET_FPRS)
10738 /* Nonzero if we can use an AltiVec register to pass this arg. */
10739 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10740 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10741 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10742 && TARGET_ALTIVEC_ABI \
10743 && (NAMED))
10745 /* Walk down the type tree of TYPE counting consecutive base elements.
10746 If *MODEP is VOIDmode, then set it to the first valid floating point
10747 or vector type. If a non-floating point or vector type is found, or
10748 if a floating point or vector type that doesn't match a non-VOIDmode
10749 *MODEP is found, then return -1, otherwise return the count in the
10750 sub-tree. */
10752 static int
10753 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10755 machine_mode mode;
10756 HOST_WIDE_INT size;
10758 switch (TREE_CODE (type))
10760 case REAL_TYPE:
10761 mode = TYPE_MODE (type);
10762 if (!SCALAR_FLOAT_MODE_P (mode))
10763 return -1;
10765 if (*modep == VOIDmode)
10766 *modep = mode;
10768 if (*modep == mode)
10769 return 1;
10771 break;
10773 case COMPLEX_TYPE:
10774 mode = TYPE_MODE (TREE_TYPE (type));
10775 if (!SCALAR_FLOAT_MODE_P (mode))
10776 return -1;
10778 if (*modep == VOIDmode)
10779 *modep = mode;
10781 if (*modep == mode)
10782 return 2;
10784 break;
10786 case VECTOR_TYPE:
10787 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10788 return -1;
10790 /* Use V4SImode as representative of all 128-bit vector types. */
10791 size = int_size_in_bytes (type);
10792 switch (size)
10794 case 16:
10795 mode = V4SImode;
10796 break;
10797 default:
10798 return -1;
10801 if (*modep == VOIDmode)
10802 *modep = mode;
10804 /* Vector modes are considered to be opaque: two vectors are
10805 equivalent for the purposes of being homogeneous aggregates
10806 if they are the same size. */
10807 if (*modep == mode)
10808 return 1;
10810 break;
10812 case ARRAY_TYPE:
10814 int count;
10815 tree index = TYPE_DOMAIN (type);
10817 /* Can't handle incomplete types nor sizes that are not
10818 fixed. */
10819 if (!COMPLETE_TYPE_P (type)
10820 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10821 return -1;
10823 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10824 if (count == -1
10825 || !index
10826 || !TYPE_MAX_VALUE (index)
10827 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10828 || !TYPE_MIN_VALUE (index)
10829 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10830 || count < 0)
10831 return -1;
10833 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10834 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10836 /* There must be no padding. */
10837 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10838 return -1;
10840 return count;
10843 case RECORD_TYPE:
10845 int count = 0;
10846 int sub_count;
10847 tree field;
10849 /* Can't handle incomplete types nor sizes that are not
10850 fixed. */
10851 if (!COMPLETE_TYPE_P (type)
10852 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10853 return -1;
10855 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10857 if (TREE_CODE (field) != FIELD_DECL)
10858 continue;
10860 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10861 if (sub_count < 0)
10862 return -1;
10863 count += sub_count;
10866 /* There must be no padding. */
10867 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10868 return -1;
10870 return count;
10873 case UNION_TYPE:
10874 case QUAL_UNION_TYPE:
10876 /* These aren't very interesting except in a degenerate case. */
10877 int count = 0;
10878 int sub_count;
10879 tree field;
10881 /* Can't handle incomplete types nor sizes that are not
10882 fixed. */
10883 if (!COMPLETE_TYPE_P (type)
10884 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10885 return -1;
10887 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10889 if (TREE_CODE (field) != FIELD_DECL)
10890 continue;
10892 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10893 if (sub_count < 0)
10894 return -1;
10895 count = count > sub_count ? count : sub_count;
10898 /* There must be no padding. */
10899 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10900 return -1;
10902 return count;
10905 default:
10906 break;
10909 return -1;
10912 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10913 float or vector aggregate that shall be passed in FP/vector registers
10914 according to the ELFv2 ABI, return the homogeneous element mode in
10915 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10917 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10919 static bool
10920 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10921 machine_mode *elt_mode,
10922 int *n_elts)
10924 /* Note that we do not accept complex types at the top level as
10925 homogeneous aggregates; these types are handled via the
10926 targetm.calls.split_complex_arg mechanism. Complex types
10927 can be elements of homogeneous aggregates, however. */
10928 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10930 machine_mode field_mode = VOIDmode;
10931 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10933 if (field_count > 0)
10935 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10936 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10938 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10939 up to AGGR_ARG_NUM_REG registers. */
10940 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10942 if (elt_mode)
10943 *elt_mode = field_mode;
10944 if (n_elts)
10945 *n_elts = field_count;
10946 return true;
10951 if (elt_mode)
10952 *elt_mode = mode;
10953 if (n_elts)
10954 *n_elts = 1;
10955 return false;
10958 /* Return a nonzero value to say to return the function value in
10959 memory, just as large structures are always returned. TYPE will be
10960 the data type of the value, and FNTYPE will be the type of the
10961 function doing the returning, or @code{NULL} for libcalls.
10963 The AIX ABI for the RS/6000 specifies that all structures are
10964 returned in memory. The Darwin ABI does the same.
10966 For the Darwin 64 Bit ABI, a function result can be returned in
10967 registers or in memory, depending on the size of the return data
10968 type. If it is returned in registers, the value occupies the same
10969 registers as it would if it were the first and only function
10970 argument. Otherwise, the function places its result in memory at
10971 the location pointed to by GPR3.
10973 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10974 but a draft put them in memory, and GCC used to implement the draft
10975 instead of the final standard. Therefore, aix_struct_return
10976 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10977 compatibility can change DRAFT_V4_STRUCT_RET to override the
10978 default, and -m switches get the final word. See
10979 rs6000_option_override_internal for more details.
10981 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10982 long double support is enabled. These values are returned in memory.
10984 int_size_in_bytes returns -1 for variable size objects, which go in
10985 memory always. The cast to unsigned makes -1 > 8. */
10987 static bool
10988 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10990 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10991 if (TARGET_MACHO
10992 && rs6000_darwin64_abi
10993 && TREE_CODE (type) == RECORD_TYPE
10994 && int_size_in_bytes (type) > 0)
10996 CUMULATIVE_ARGS valcum;
10997 rtx valret;
10999 valcum.words = 0;
11000 valcum.fregno = FP_ARG_MIN_REG;
11001 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11002 /* Do a trial code generation as if this were going to be passed
11003 as an argument; if any part goes in memory, we return NULL. */
11004 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11005 if (valret)
11006 return false;
11007 /* Otherwise fall through to more conventional ABI rules. */
11010 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11011 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11012 NULL, NULL))
11013 return false;
11015 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11016 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11017 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11018 return false;
11020 if (AGGREGATE_TYPE_P (type)
11021 && (aix_struct_return
11022 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11023 return true;
11025 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11026 modes only exist for GCC vector types if -maltivec. */
11027 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11028 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11029 return false;
11031 /* Return synthetic vectors in memory. */
11032 if (TREE_CODE (type) == VECTOR_TYPE
11033 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11035 static bool warned_for_return_big_vectors = false;
11036 if (!warned_for_return_big_vectors)
11038 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11039 "non-standard ABI extension with no compatibility guarantee");
11040 warned_for_return_big_vectors = true;
11042 return true;
11045 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11046 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11047 return true;
11049 return false;
11052 /* Specify whether values returned in registers should be at the most
11053 significant end of a register. We want aggregates returned by
11054 value to match the way aggregates are passed to functions. */
11056 static bool
11057 rs6000_return_in_msb (const_tree valtype)
11059 return (DEFAULT_ABI == ABI_ELFv2
11060 && BYTES_BIG_ENDIAN
11061 && AGGREGATE_TYPE_P (valtype)
11062 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11065 #ifdef HAVE_AS_GNU_ATTRIBUTE
11066 /* Return TRUE if a call to function FNDECL may be one that
11067 potentially affects the function calling ABI of the object file. */
11069 static bool
11070 call_ABI_of_interest (tree fndecl)
11072 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11074 struct cgraph_node *c_node;
11076 /* Libcalls are always interesting. */
11077 if (fndecl == NULL_TREE)
11078 return true;
11080 /* Any call to an external function is interesting. */
11081 if (DECL_EXTERNAL (fndecl))
11082 return true;
11084 /* Interesting functions that we are emitting in this object file. */
11085 c_node = cgraph_node::get (fndecl);
11086 c_node = c_node->ultimate_alias_target ();
11087 return !c_node->only_called_directly_p ();
11089 return false;
11091 #endif
11093 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11094 for a call to a function whose data type is FNTYPE.
11095 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11097 For incoming args we set the number of arguments in the prototype large
11098 so we never return a PARALLEL. */
11100 void
11101 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11102 rtx libname ATTRIBUTE_UNUSED, int incoming,
11103 int libcall, int n_named_args,
11104 tree fndecl ATTRIBUTE_UNUSED,
11105 machine_mode return_mode ATTRIBUTE_UNUSED)
11107 static CUMULATIVE_ARGS zero_cumulative;
11109 *cum = zero_cumulative;
11110 cum->words = 0;
11111 cum->fregno = FP_ARG_MIN_REG;
11112 cum->vregno = ALTIVEC_ARG_MIN_REG;
11113 cum->prototype = (fntype && prototype_p (fntype));
11114 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11115 ? CALL_LIBCALL : CALL_NORMAL);
11116 cum->sysv_gregno = GP_ARG_MIN_REG;
11117 cum->stdarg = stdarg_p (fntype);
11118 cum->libcall = libcall;
11120 cum->nargs_prototype = 0;
11121 if (incoming || cum->prototype)
11122 cum->nargs_prototype = n_named_args;
11124 /* Check for a longcall attribute. */
11125 if ((!fntype && rs6000_default_long_calls)
11126 || (fntype
11127 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11128 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11129 cum->call_cookie |= CALL_LONG;
11131 if (TARGET_DEBUG_ARG)
11133 fprintf (stderr, "\ninit_cumulative_args:");
11134 if (fntype)
11136 tree ret_type = TREE_TYPE (fntype);
11137 fprintf (stderr, " ret code = %s,",
11138 get_tree_code_name (TREE_CODE (ret_type)));
11141 if (cum->call_cookie & CALL_LONG)
11142 fprintf (stderr, " longcall,");
11144 fprintf (stderr, " proto = %d, nargs = %d\n",
11145 cum->prototype, cum->nargs_prototype);
11148 #ifdef HAVE_AS_GNU_ATTRIBUTE
11149 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11151 cum->escapes = call_ABI_of_interest (fndecl);
11152 if (cum->escapes)
11154 tree return_type;
11156 if (fntype)
11158 return_type = TREE_TYPE (fntype);
11159 return_mode = TYPE_MODE (return_type);
11161 else
11162 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11164 if (return_type != NULL)
11166 if (TREE_CODE (return_type) == RECORD_TYPE
11167 && TYPE_TRANSPARENT_AGGR (return_type))
11169 return_type = TREE_TYPE (first_field (return_type));
11170 return_mode = TYPE_MODE (return_type);
11172 if (AGGREGATE_TYPE_P (return_type)
11173 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11174 <= 8))
11175 rs6000_returns_struct = true;
11177 if (SCALAR_FLOAT_MODE_P (return_mode))
11179 rs6000_passes_float = true;
11180 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11181 && (FLOAT128_IBM_P (return_mode)
11182 || FLOAT128_IEEE_P (return_mode)
11183 || (return_type != NULL
11184 && (TYPE_MAIN_VARIANT (return_type)
11185 == long_double_type_node))))
11186 rs6000_passes_long_double = true;
11188 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11189 || SPE_VECTOR_MODE (return_mode))
11190 rs6000_passes_vector = true;
11193 #endif
11195 if (fntype
11196 && !TARGET_ALTIVEC
11197 && TARGET_ALTIVEC_ABI
11198 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11200 error ("cannot return value in vector register because"
11201 " altivec instructions are disabled, use -maltivec"
11202 " to enable them");
11206 /* The mode the ABI uses for a word. This is not the same as word_mode
11207 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11209 static machine_mode
11210 rs6000_abi_word_mode (void)
11212 return TARGET_32BIT ? SImode : DImode;
11215 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11216 static char *
11217 rs6000_offload_options (void)
11219 if (TARGET_64BIT)
11220 return xstrdup ("-foffload-abi=lp64");
11221 else
11222 return xstrdup ("-foffload-abi=ilp32");
11225 /* On rs6000, function arguments are promoted, as are function return
11226 values. */
11228 static machine_mode
11229 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11230 machine_mode mode,
11231 int *punsignedp ATTRIBUTE_UNUSED,
11232 const_tree, int)
11234 PROMOTE_MODE (mode, *punsignedp, type);
11236 return mode;
11239 /* Return true if TYPE must be passed on the stack and not in registers. */
11241 static bool
11242 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11244 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11245 return must_pass_in_stack_var_size (mode, type);
11246 else
11247 return must_pass_in_stack_var_size_or_pad (mode, type);
11250 static inline bool
11251 is_complex_IBM_long_double (machine_mode mode)
11253 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11256 /* Whether ABI_V4 passes MODE args to a function in floating point
11257 registers. */
11259 static bool
11260 abi_v4_pass_in_fpr (machine_mode mode)
11262 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11263 return false;
11264 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11265 return true;
11266 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11267 return true;
11268 /* ABI_V4 passes complex IBM long double in 8 gprs.
11269 Stupid, but we can't change the ABI now. */
11270 if (is_complex_IBM_long_double (mode))
11271 return false;
11272 if (FLOAT128_2REG_P (mode))
11273 return true;
11274 if (DECIMAL_FLOAT_MODE_P (mode))
11275 return true;
11276 return false;
11279 /* If defined, a C expression which determines whether, and in which
11280 direction, to pad out an argument with extra space. The value
11281 should be of type `enum direction': either `upward' to pad above
11282 the argument, `downward' to pad below, or `none' to inhibit
11283 padding.
11285 For the AIX ABI structs are always stored left shifted in their
11286 argument slot. */
11288 enum direction
11289 function_arg_padding (machine_mode mode, const_tree type)
11291 #ifndef AGGREGATE_PADDING_FIXED
11292 #define AGGREGATE_PADDING_FIXED 0
11293 #endif
11294 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11295 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11296 #endif
11298 if (!AGGREGATE_PADDING_FIXED)
11300 /* GCC used to pass structures of the same size as integer types as
11301 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11302 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11303 passed padded downward, except that -mstrict-align further
11304 muddied the water in that multi-component structures of 2 and 4
11305 bytes in size were passed padded upward.
11307 The following arranges for best compatibility with previous
11308 versions of gcc, but removes the -mstrict-align dependency. */
11309 if (BYTES_BIG_ENDIAN)
11311 HOST_WIDE_INT size = 0;
11313 if (mode == BLKmode)
11315 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11316 size = int_size_in_bytes (type);
11318 else
11319 size = GET_MODE_SIZE (mode);
11321 if (size == 1 || size == 2 || size == 4)
11322 return downward;
11324 return upward;
11327 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11329 if (type != 0 && AGGREGATE_TYPE_P (type))
11330 return upward;
11333 /* Fall back to the default. */
11334 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11337 /* If defined, a C expression that gives the alignment boundary, in bits,
11338 of an argument with the specified mode and type. If it is not defined,
11339 PARM_BOUNDARY is used for all arguments.
11341 V.4 wants long longs and doubles to be double word aligned. Just
11342 testing the mode size is a boneheaded way to do this as it means
11343 that other types such as complex int are also double word aligned.
11344 However, we're stuck with this because changing the ABI might break
11345 existing library interfaces.
11347 Doubleword align SPE vectors.
11348 Quadword align Altivec/VSX vectors.
11349 Quadword align large synthetic vector types. */
11351 static unsigned int
11352 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11354 machine_mode elt_mode;
11355 int n_elts;
11357 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11359 if (DEFAULT_ABI == ABI_V4
11360 && (GET_MODE_SIZE (mode) == 8
11361 || (TARGET_HARD_FLOAT
11362 && TARGET_FPRS
11363 && !is_complex_IBM_long_double (mode)
11364 && FLOAT128_2REG_P (mode))))
11365 return 64;
11366 else if (FLOAT128_VECTOR_P (mode))
11367 return 128;
11368 else if (SPE_VECTOR_MODE (mode)
11369 || (type && TREE_CODE (type) == VECTOR_TYPE
11370 && int_size_in_bytes (type) >= 8
11371 && int_size_in_bytes (type) < 16))
11372 return 64;
11373 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11374 || (type && TREE_CODE (type) == VECTOR_TYPE
11375 && int_size_in_bytes (type) >= 16))
11376 return 128;
11378 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11379 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11380 -mcompat-align-parm is used. */
11381 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11382 || DEFAULT_ABI == ABI_ELFv2)
11383 && type && TYPE_ALIGN (type) > 64)
11385 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11386 or homogeneous float/vector aggregates here. We already handled
11387 vector aggregates above, but still need to check for float here. */
11388 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11389 && !SCALAR_FLOAT_MODE_P (elt_mode));
11391 /* We used to check for BLKmode instead of the above aggregate type
11392 check. Warn when this results in any difference to the ABI. */
11393 if (aggregate_p != (mode == BLKmode))
11395 static bool warned;
11396 if (!warned && warn_psabi)
11398 warned = true;
11399 inform (input_location,
11400 "the ABI of passing aggregates with %d-byte alignment"
11401 " has changed in GCC 5",
11402 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11406 if (aggregate_p)
11407 return 128;
11410 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11411 implement the "aggregate type" check as a BLKmode check here; this
11412 means certain aggregate types are in fact not aligned. */
11413 if (TARGET_MACHO && rs6000_darwin64_abi
11414 && mode == BLKmode
11415 && type && TYPE_ALIGN (type) > 64)
11416 return 128;
11418 return PARM_BOUNDARY;
11421 /* The offset in words to the start of the parameter save area. */
11423 static unsigned int
11424 rs6000_parm_offset (void)
11426 return (DEFAULT_ABI == ABI_V4 ? 2
11427 : DEFAULT_ABI == ABI_ELFv2 ? 4
11428 : 6);
11431 /* For a function parm of MODE and TYPE, return the starting word in
11432 the parameter area. NWORDS of the parameter area are already used. */
11434 static unsigned int
11435 rs6000_parm_start (machine_mode mode, const_tree type,
11436 unsigned int nwords)
11438 unsigned int align;
11440 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11441 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11444 /* Compute the size (in words) of a function argument. */
11446 static unsigned long
11447 rs6000_arg_size (machine_mode mode, const_tree type)
11449 unsigned long size;
11451 if (mode != BLKmode)
11452 size = GET_MODE_SIZE (mode);
11453 else
11454 size = int_size_in_bytes (type);
11456 if (TARGET_32BIT)
11457 return (size + 3) >> 2;
11458 else
11459 return (size + 7) >> 3;
11462 /* Use this to flush pending int fields. */
11464 static void
11465 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11466 HOST_WIDE_INT bitpos, int final)
11468 unsigned int startbit, endbit;
11469 int intregs, intoffset;
11470 machine_mode mode;
11472 /* Handle the situations where a float is taking up the first half
11473 of the GPR, and the other half is empty (typically due to
11474 alignment restrictions). We can detect this by a 8-byte-aligned
11475 int field, or by seeing that this is the final flush for this
11476 argument. Count the word and continue on. */
11477 if (cum->floats_in_gpr == 1
11478 && (cum->intoffset % 64 == 0
11479 || (cum->intoffset == -1 && final)))
11481 cum->words++;
11482 cum->floats_in_gpr = 0;
11485 if (cum->intoffset == -1)
11486 return;
11488 intoffset = cum->intoffset;
11489 cum->intoffset = -1;
11490 cum->floats_in_gpr = 0;
11492 if (intoffset % BITS_PER_WORD != 0)
11494 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11495 MODE_INT, 0);
11496 if (mode == BLKmode)
11498 /* We couldn't find an appropriate mode, which happens,
11499 e.g., in packed structs when there are 3 bytes to load.
11500 Back intoffset back to the beginning of the word in this
11501 case. */
11502 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11506 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11507 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11508 intregs = (endbit - startbit) / BITS_PER_WORD;
11509 cum->words += intregs;
11510 /* words should be unsigned. */
11511 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11513 int pad = (endbit/BITS_PER_WORD) - cum->words;
11514 cum->words += pad;
11518 /* The darwin64 ABI calls for us to recurse down through structs,
11519 looking for elements passed in registers. Unfortunately, we have
11520 to track int register count here also because of misalignments
11521 in powerpc alignment mode. */
11523 static void
11524 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11525 const_tree type,
11526 HOST_WIDE_INT startbitpos)
11528 tree f;
11530 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11531 if (TREE_CODE (f) == FIELD_DECL)
11533 HOST_WIDE_INT bitpos = startbitpos;
11534 tree ftype = TREE_TYPE (f);
11535 machine_mode mode;
11536 if (ftype == error_mark_node)
11537 continue;
11538 mode = TYPE_MODE (ftype);
11540 if (DECL_SIZE (f) != 0
11541 && tree_fits_uhwi_p (bit_position (f)))
11542 bitpos += int_bit_position (f);
11544 /* ??? FIXME: else assume zero offset. */
11546 if (TREE_CODE (ftype) == RECORD_TYPE)
11547 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11548 else if (USE_FP_FOR_ARG_P (cum, mode))
11550 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11551 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11552 cum->fregno += n_fpregs;
11553 /* Single-precision floats present a special problem for
11554 us, because they are smaller than an 8-byte GPR, and so
11555 the structure-packing rules combined with the standard
11556 varargs behavior mean that we want to pack float/float
11557 and float/int combinations into a single register's
11558 space. This is complicated by the arg advance flushing,
11559 which works on arbitrarily large groups of int-type
11560 fields. */
11561 if (mode == SFmode)
11563 if (cum->floats_in_gpr == 1)
11565 /* Two floats in a word; count the word and reset
11566 the float count. */
11567 cum->words++;
11568 cum->floats_in_gpr = 0;
11570 else if (bitpos % 64 == 0)
11572 /* A float at the beginning of an 8-byte word;
11573 count it and put off adjusting cum->words until
11574 we see if a arg advance flush is going to do it
11575 for us. */
11576 cum->floats_in_gpr++;
11578 else
11580 /* The float is at the end of a word, preceded
11581 by integer fields, so the arg advance flush
11582 just above has already set cum->words and
11583 everything is taken care of. */
11586 else
11587 cum->words += n_fpregs;
11589 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11591 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11592 cum->vregno++;
11593 cum->words += 2;
11595 else if (cum->intoffset == -1)
11596 cum->intoffset = bitpos;
11600 /* Check for an item that needs to be considered specially under the darwin 64
11601 bit ABI. These are record types where the mode is BLK or the structure is
11602 8 bytes in size. */
11603 static int
11604 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11606 return rs6000_darwin64_abi
11607 && ((mode == BLKmode
11608 && TREE_CODE (type) == RECORD_TYPE
11609 && int_size_in_bytes (type) > 0)
11610 || (type && TREE_CODE (type) == RECORD_TYPE
11611 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11614 /* Update the data in CUM to advance over an argument
11615 of mode MODE and data type TYPE.
11616 (TYPE is null for libcalls where that information may not be available.)
11618 Note that for args passed by reference, function_arg will be called
11619 with MODE and TYPE set to that of the pointer to the arg, not the arg
11620 itself. */
11622 static void
11623 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11624 const_tree type, bool named, int depth)
11626 machine_mode elt_mode;
11627 int n_elts;
11629 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11631 /* Only tick off an argument if we're not recursing. */
11632 if (depth == 0)
11633 cum->nargs_prototype--;
11635 #ifdef HAVE_AS_GNU_ATTRIBUTE
11636 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11637 && cum->escapes)
11639 if (SCALAR_FLOAT_MODE_P (mode))
11641 rs6000_passes_float = true;
11642 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11643 && (FLOAT128_IBM_P (mode)
11644 || FLOAT128_IEEE_P (mode)
11645 || (type != NULL
11646 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11647 rs6000_passes_long_double = true;
11649 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11650 || (SPE_VECTOR_MODE (mode)
11651 && !cum->stdarg
11652 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11653 rs6000_passes_vector = true;
11655 #endif
11657 if (TARGET_ALTIVEC_ABI
11658 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11659 || (type && TREE_CODE (type) == VECTOR_TYPE
11660 && int_size_in_bytes (type) == 16)))
11662 bool stack = false;
11664 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11666 cum->vregno += n_elts;
11668 if (!TARGET_ALTIVEC)
11669 error ("cannot pass argument in vector register because"
11670 " altivec instructions are disabled, use -maltivec"
11671 " to enable them");
11673 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11674 even if it is going to be passed in a vector register.
11675 Darwin does the same for variable-argument functions. */
11676 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11677 && TARGET_64BIT)
11678 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11679 stack = true;
11681 else
11682 stack = true;
11684 if (stack)
11686 int align;
11688 /* Vector parameters must be 16-byte aligned. In 32-bit
11689 mode this means we need to take into account the offset
11690 to the parameter save area. In 64-bit mode, they just
11691 have to start on an even word, since the parameter save
11692 area is 16-byte aligned. */
11693 if (TARGET_32BIT)
11694 align = -(rs6000_parm_offset () + cum->words) & 3;
11695 else
11696 align = cum->words & 1;
11697 cum->words += align + rs6000_arg_size (mode, type);
11699 if (TARGET_DEBUG_ARG)
11701 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11702 cum->words, align);
11703 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11704 cum->nargs_prototype, cum->prototype,
11705 GET_MODE_NAME (mode));
11709 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11710 && !cum->stdarg
11711 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11712 cum->sysv_gregno++;
11714 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11716 int size = int_size_in_bytes (type);
11717 /* Variable sized types have size == -1 and are
11718 treated as if consisting entirely of ints.
11719 Pad to 16 byte boundary if needed. */
11720 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11721 && (cum->words % 2) != 0)
11722 cum->words++;
11723 /* For varargs, we can just go up by the size of the struct. */
11724 if (!named)
11725 cum->words += (size + 7) / 8;
11726 else
11728 /* It is tempting to say int register count just goes up by
11729 sizeof(type)/8, but this is wrong in a case such as
11730 { int; double; int; } [powerpc alignment]. We have to
11731 grovel through the fields for these too. */
11732 cum->intoffset = 0;
11733 cum->floats_in_gpr = 0;
11734 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11735 rs6000_darwin64_record_arg_advance_flush (cum,
11736 size * BITS_PER_UNIT, 1);
11738 if (TARGET_DEBUG_ARG)
11740 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11741 cum->words, TYPE_ALIGN (type), size);
11742 fprintf (stderr,
11743 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11744 cum->nargs_prototype, cum->prototype,
11745 GET_MODE_NAME (mode));
11748 else if (DEFAULT_ABI == ABI_V4)
11750 if (abi_v4_pass_in_fpr (mode))
11752 /* _Decimal128 must use an even/odd register pair. This assumes
11753 that the register number is odd when fregno is odd. */
11754 if (mode == TDmode && (cum->fregno % 2) == 1)
11755 cum->fregno++;
11757 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11758 <= FP_ARG_V4_MAX_REG)
11759 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11760 else
11762 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11763 if (mode == DFmode || FLOAT128_IBM_P (mode)
11764 || mode == DDmode || mode == TDmode)
11765 cum->words += cum->words & 1;
11766 cum->words += rs6000_arg_size (mode, type);
11769 else
11771 int n_words = rs6000_arg_size (mode, type);
11772 int gregno = cum->sysv_gregno;
11774 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11775 (r7,r8) or (r9,r10). As does any other 2 word item such
11776 as complex int due to a historical mistake. */
11777 if (n_words == 2)
11778 gregno += (1 - gregno) & 1;
11780 /* Multi-reg args are not split between registers and stack. */
11781 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11783 /* Long long and SPE vectors are aligned on the stack.
11784 So are other 2 word items such as complex int due to
11785 a historical mistake. */
11786 if (n_words == 2)
11787 cum->words += cum->words & 1;
11788 cum->words += n_words;
11791 /* Note: continuing to accumulate gregno past when we've started
11792 spilling to the stack indicates the fact that we've started
11793 spilling to the stack to expand_builtin_saveregs. */
11794 cum->sysv_gregno = gregno + n_words;
11797 if (TARGET_DEBUG_ARG)
11799 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11800 cum->words, cum->fregno);
11801 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11802 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11803 fprintf (stderr, "mode = %4s, named = %d\n",
11804 GET_MODE_NAME (mode), named);
11807 else
11809 int n_words = rs6000_arg_size (mode, type);
11810 int start_words = cum->words;
11811 int align_words = rs6000_parm_start (mode, type, start_words);
11813 cum->words = align_words + n_words;
11815 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11817 /* _Decimal128 must be passed in an even/odd float register pair.
11818 This assumes that the register number is odd when fregno is
11819 odd. */
11820 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11821 cum->fregno++;
11822 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11825 if (TARGET_DEBUG_ARG)
11827 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11828 cum->words, cum->fregno);
11829 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11830 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11831 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11832 named, align_words - start_words, depth);
11837 static void
11838 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11839 const_tree type, bool named)
11841 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11845 static rtx
11846 spe_build_register_parallel (machine_mode mode, int gregno)
11848 rtx r1, r3, r5, r7;
11850 switch (mode)
11852 case DFmode:
11853 r1 = gen_rtx_REG (DImode, gregno);
11854 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11855 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11857 case DCmode:
11858 case TFmode:
11859 r1 = gen_rtx_REG (DImode, gregno);
11860 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11861 r3 = gen_rtx_REG (DImode, gregno + 2);
11862 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11863 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11865 case TCmode:
11866 r1 = gen_rtx_REG (DImode, gregno);
11867 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11868 r3 = gen_rtx_REG (DImode, gregno + 2);
11869 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11870 r5 = gen_rtx_REG (DImode, gregno + 4);
11871 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11872 r7 = gen_rtx_REG (DImode, gregno + 6);
11873 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11874 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11876 default:
11877 gcc_unreachable ();
11881 /* Determine where to put a SIMD argument on the SPE. */
11882 static rtx
11883 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11884 const_tree type)
11886 int gregno = cum->sysv_gregno;
11888 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11889 are passed and returned in a pair of GPRs for ABI compatibility. */
11890 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11891 || mode == DCmode || mode == TCmode))
11893 int n_words = rs6000_arg_size (mode, type);
11895 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11896 if (mode == DFmode)
11897 gregno += (1 - gregno) & 1;
11899 /* Multi-reg args are not split between registers and stack. */
11900 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11901 return NULL_RTX;
11903 return spe_build_register_parallel (mode, gregno);
11905 if (cum->stdarg)
11907 int n_words = rs6000_arg_size (mode, type);
11909 /* SPE vectors are put in odd registers. */
11910 if (n_words == 2 && (gregno & 1) == 0)
11911 gregno += 1;
11913 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11915 rtx r1, r2;
11916 machine_mode m = SImode;
11918 r1 = gen_rtx_REG (m, gregno);
11919 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11920 r2 = gen_rtx_REG (m, gregno + 1);
11921 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11922 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11924 else
11925 return NULL_RTX;
11927 else
11929 if (gregno <= GP_ARG_MAX_REG)
11930 return gen_rtx_REG (mode, gregno);
11931 else
11932 return NULL_RTX;
11936 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11937 structure between cum->intoffset and bitpos to integer registers. */
11939 static void
11940 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11941 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11943 machine_mode mode;
11944 unsigned int regno;
11945 unsigned int startbit, endbit;
11946 int this_regno, intregs, intoffset;
11947 rtx reg;
11949 if (cum->intoffset == -1)
11950 return;
11952 intoffset = cum->intoffset;
11953 cum->intoffset = -1;
11955 /* If this is the trailing part of a word, try to only load that
11956 much into the register. Otherwise load the whole register. Note
11957 that in the latter case we may pick up unwanted bits. It's not a
11958 problem at the moment but may wish to revisit. */
11960 if (intoffset % BITS_PER_WORD != 0)
11962 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11963 MODE_INT, 0);
11964 if (mode == BLKmode)
11966 /* We couldn't find an appropriate mode, which happens,
11967 e.g., in packed structs when there are 3 bytes to load.
11968 Back intoffset back to the beginning of the word in this
11969 case. */
11970 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11971 mode = word_mode;
11974 else
11975 mode = word_mode;
11977 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11978 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11979 intregs = (endbit - startbit) / BITS_PER_WORD;
11980 this_regno = cum->words + intoffset / BITS_PER_WORD;
11982 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11983 cum->use_stack = 1;
11985 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11986 if (intregs <= 0)
11987 return;
11989 intoffset /= BITS_PER_UNIT;
11992 regno = GP_ARG_MIN_REG + this_regno;
11993 reg = gen_rtx_REG (mode, regno);
11994 rvec[(*k)++] =
11995 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11997 this_regno += 1;
11998 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11999 mode = word_mode;
12000 intregs -= 1;
12002 while (intregs > 0);
12005 /* Recursive workhorse for the following. */
12007 static void
12008 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12009 HOST_WIDE_INT startbitpos, rtx rvec[],
12010 int *k)
12012 tree f;
12014 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12015 if (TREE_CODE (f) == FIELD_DECL)
12017 HOST_WIDE_INT bitpos = startbitpos;
12018 tree ftype = TREE_TYPE (f);
12019 machine_mode mode;
12020 if (ftype == error_mark_node)
12021 continue;
12022 mode = TYPE_MODE (ftype);
12024 if (DECL_SIZE (f) != 0
12025 && tree_fits_uhwi_p (bit_position (f)))
12026 bitpos += int_bit_position (f);
12028 /* ??? FIXME: else assume zero offset. */
12030 if (TREE_CODE (ftype) == RECORD_TYPE)
12031 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12032 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12034 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12035 #if 0
12036 switch (mode)
12038 case SCmode: mode = SFmode; break;
12039 case DCmode: mode = DFmode; break;
12040 case TCmode: mode = TFmode; break;
12041 default: break;
12043 #endif
12044 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12045 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12047 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12048 && (mode == TFmode || mode == TDmode));
12049 /* Long double or _Decimal128 split over regs and memory. */
12050 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12051 cum->use_stack=1;
12053 rvec[(*k)++]
12054 = gen_rtx_EXPR_LIST (VOIDmode,
12055 gen_rtx_REG (mode, cum->fregno++),
12056 GEN_INT (bitpos / BITS_PER_UNIT));
12057 if (FLOAT128_2REG_P (mode))
12058 cum->fregno++;
12060 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12062 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12063 rvec[(*k)++]
12064 = gen_rtx_EXPR_LIST (VOIDmode,
12065 gen_rtx_REG (mode, cum->vregno++),
12066 GEN_INT (bitpos / BITS_PER_UNIT));
12068 else if (cum->intoffset == -1)
12069 cum->intoffset = bitpos;
12073 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12074 the register(s) to be used for each field and subfield of a struct
12075 being passed by value, along with the offset of where the
12076 register's value may be found in the block. FP fields go in FP
12077 register, vector fields go in vector registers, and everything
12078 else goes in int registers, packed as in memory.
12080 This code is also used for function return values. RETVAL indicates
12081 whether this is the case.
12083 Much of this is taken from the SPARC V9 port, which has a similar
12084 calling convention. */
12086 static rtx
12087 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12088 bool named, bool retval)
12090 rtx rvec[FIRST_PSEUDO_REGISTER];
12091 int k = 1, kbase = 1;
12092 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12093 /* This is a copy; modifications are not visible to our caller. */
12094 CUMULATIVE_ARGS copy_cum = *orig_cum;
12095 CUMULATIVE_ARGS *cum = &copy_cum;
12097 /* Pad to 16 byte boundary if needed. */
12098 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12099 && (cum->words % 2) != 0)
12100 cum->words++;
12102 cum->intoffset = 0;
12103 cum->use_stack = 0;
12104 cum->named = named;
12106 /* Put entries into rvec[] for individual FP and vector fields, and
12107 for the chunks of memory that go in int regs. Note we start at
12108 element 1; 0 is reserved for an indication of using memory, and
12109 may or may not be filled in below. */
12110 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12111 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12113 /* If any part of the struct went on the stack put all of it there.
12114 This hack is because the generic code for
12115 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12116 parts of the struct are not at the beginning. */
12117 if (cum->use_stack)
12119 if (retval)
12120 return NULL_RTX; /* doesn't go in registers at all */
12121 kbase = 0;
12122 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12124 if (k > 1 || cum->use_stack)
12125 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12126 else
12127 return NULL_RTX;
12130 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12132 static rtx
12133 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12134 int align_words)
12136 int n_units;
12137 int i, k;
12138 rtx rvec[GP_ARG_NUM_REG + 1];
12140 if (align_words >= GP_ARG_NUM_REG)
12141 return NULL_RTX;
12143 n_units = rs6000_arg_size (mode, type);
12145 /* Optimize the simple case where the arg fits in one gpr, except in
12146 the case of BLKmode due to assign_parms assuming that registers are
12147 BITS_PER_WORD wide. */
12148 if (n_units == 0
12149 || (n_units == 1 && mode != BLKmode))
12150 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12152 k = 0;
12153 if (align_words + n_units > GP_ARG_NUM_REG)
12154 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12155 using a magic NULL_RTX component.
12156 This is not strictly correct. Only some of the arg belongs in
12157 memory, not all of it. However, the normal scheme using
12158 function_arg_partial_nregs can result in unusual subregs, eg.
12159 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12160 store the whole arg to memory is often more efficient than code
12161 to store pieces, and we know that space is available in the right
12162 place for the whole arg. */
12163 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12165 i = 0;
12168 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12169 rtx off = GEN_INT (i++ * 4);
12170 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12172 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12174 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12177 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12178 but must also be copied into the parameter save area starting at
12179 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12180 to the GPRs and/or memory. Return the number of elements used. */
12182 static int
12183 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12184 int align_words, rtx *rvec)
12186 int k = 0;
12188 if (align_words < GP_ARG_NUM_REG)
12190 int n_words = rs6000_arg_size (mode, type);
12192 if (align_words + n_words > GP_ARG_NUM_REG
12193 || mode == BLKmode
12194 || (TARGET_32BIT && TARGET_POWERPC64))
12196 /* If this is partially on the stack, then we only
12197 include the portion actually in registers here. */
12198 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12199 int i = 0;
12201 if (align_words + n_words > GP_ARG_NUM_REG)
12203 /* Not all of the arg fits in gprs. Say that it goes in memory
12204 too, using a magic NULL_RTX component. Also see comment in
12205 rs6000_mixed_function_arg for why the normal
12206 function_arg_partial_nregs scheme doesn't work in this case. */
12207 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12212 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12213 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12214 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12216 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12218 else
12220 /* The whole arg fits in gprs. */
12221 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12222 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12225 else
12227 /* It's entirely in memory. */
12228 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12231 return k;
12234 /* RVEC is a vector of K components of an argument of mode MODE.
12235 Construct the final function_arg return value from it. */
12237 static rtx
12238 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12240 gcc_assert (k >= 1);
12242 /* Avoid returning a PARALLEL in the trivial cases. */
12243 if (k == 1)
12245 if (XEXP (rvec[0], 0) == NULL_RTX)
12246 return NULL_RTX;
12248 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12249 return XEXP (rvec[0], 0);
12252 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12255 /* Determine where to put an argument to a function.
12256 Value is zero to push the argument on the stack,
12257 or a hard register in which to store the argument.
12259 MODE is the argument's machine mode.
12260 TYPE is the data type of the argument (as a tree).
12261 This is null for libcalls where that information may
12262 not be available.
12263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12264 the preceding args and about the function being called. It is
12265 not modified in this routine.
12266 NAMED is nonzero if this argument is a named parameter
12267 (otherwise it is an extra parameter matching an ellipsis).
12269 On RS/6000 the first eight words of non-FP are normally in registers
12270 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12271 Under V.4, the first 8 FP args are in registers.
12273 If this is floating-point and no prototype is specified, we use
12274 both an FP and integer register (or possibly FP reg and stack). Library
12275 functions (when CALL_LIBCALL is set) always have the proper types for args,
12276 so we can pass the FP value just in one register. emit_library_function
12277 doesn't support PARALLEL anyway.
12279 Note that for args passed by reference, function_arg will be called
12280 with MODE and TYPE set to that of the pointer to the arg, not the arg
12281 itself. */
12283 static rtx
12284 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12285 const_tree type, bool named)
12287 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12288 enum rs6000_abi abi = DEFAULT_ABI;
12289 machine_mode elt_mode;
12290 int n_elts;
12292 /* Return a marker to indicate whether CR1 needs to set or clear the
12293 bit that V.4 uses to say fp args were passed in registers.
12294 Assume that we don't need the marker for software floating point,
12295 or compiler generated library calls. */
12296 if (mode == VOIDmode)
12298 if (abi == ABI_V4
12299 && (cum->call_cookie & CALL_LIBCALL) == 0
12300 && (cum->stdarg
12301 || (cum->nargs_prototype < 0
12302 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12304 /* For the SPE, we need to crxor CR6 always. */
12305 if (TARGET_SPE_ABI)
12306 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12307 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12308 return GEN_INT (cum->call_cookie
12309 | ((cum->fregno == FP_ARG_MIN_REG)
12310 ? CALL_V4_SET_FP_ARGS
12311 : CALL_V4_CLEAR_FP_ARGS));
12314 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12317 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12319 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12321 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12322 if (rslt != NULL_RTX)
12323 return rslt;
12324 /* Else fall through to usual handling. */
12327 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12329 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12330 rtx r, off;
12331 int i, k = 0;
12333 /* Do we also need to pass this argument in the parameter save area?
12334 Library support functions for IEEE 128-bit are assumed to not need the
12335 value passed both in GPRs and in vector registers. */
12336 if (TARGET_64BIT && !cum->prototype
12337 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12339 int align_words = ROUND_UP (cum->words, 2);
12340 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12343 /* Describe where this argument goes in the vector registers. */
12344 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12346 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12347 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12348 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12351 return rs6000_finish_function_arg (mode, rvec, k);
12353 else if (TARGET_ALTIVEC_ABI
12354 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12355 || (type && TREE_CODE (type) == VECTOR_TYPE
12356 && int_size_in_bytes (type) == 16)))
12358 if (named || abi == ABI_V4)
12359 return NULL_RTX;
12360 else
12362 /* Vector parameters to varargs functions under AIX or Darwin
12363 get passed in memory and possibly also in GPRs. */
12364 int align, align_words, n_words;
12365 machine_mode part_mode;
12367 /* Vector parameters must be 16-byte aligned. In 32-bit
12368 mode this means we need to take into account the offset
12369 to the parameter save area. In 64-bit mode, they just
12370 have to start on an even word, since the parameter save
12371 area is 16-byte aligned. */
12372 if (TARGET_32BIT)
12373 align = -(rs6000_parm_offset () + cum->words) & 3;
12374 else
12375 align = cum->words & 1;
12376 align_words = cum->words + align;
12378 /* Out of registers? Memory, then. */
12379 if (align_words >= GP_ARG_NUM_REG)
12380 return NULL_RTX;
12382 if (TARGET_32BIT && TARGET_POWERPC64)
12383 return rs6000_mixed_function_arg (mode, type, align_words);
12385 /* The vector value goes in GPRs. Only the part of the
12386 value in GPRs is reported here. */
12387 part_mode = mode;
12388 n_words = rs6000_arg_size (mode, type);
12389 if (align_words + n_words > GP_ARG_NUM_REG)
12390 /* Fortunately, there are only two possibilities, the value
12391 is either wholly in GPRs or half in GPRs and half not. */
12392 part_mode = DImode;
12394 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12397 else if (TARGET_SPE_ABI && TARGET_SPE
12398 && (SPE_VECTOR_MODE (mode)
12399 || (TARGET_E500_DOUBLE && (mode == DFmode
12400 || mode == DCmode
12401 || mode == TFmode
12402 || mode == TCmode))))
12403 return rs6000_spe_function_arg (cum, mode, type);
12405 else if (abi == ABI_V4)
12407 if (abi_v4_pass_in_fpr (mode))
12409 /* _Decimal128 must use an even/odd register pair. This assumes
12410 that the register number is odd when fregno is odd. */
12411 if (mode == TDmode && (cum->fregno % 2) == 1)
12412 cum->fregno++;
12414 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12415 <= FP_ARG_V4_MAX_REG)
12416 return gen_rtx_REG (mode, cum->fregno);
12417 else
12418 return NULL_RTX;
12420 else
12422 int n_words = rs6000_arg_size (mode, type);
12423 int gregno = cum->sysv_gregno;
12425 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12426 (r7,r8) or (r9,r10). As does any other 2 word item such
12427 as complex int due to a historical mistake. */
12428 if (n_words == 2)
12429 gregno += (1 - gregno) & 1;
12431 /* Multi-reg args are not split between registers and stack. */
12432 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12433 return NULL_RTX;
12435 if (TARGET_32BIT && TARGET_POWERPC64)
12436 return rs6000_mixed_function_arg (mode, type,
12437 gregno - GP_ARG_MIN_REG);
12438 return gen_rtx_REG (mode, gregno);
12441 else
12443 int align_words = rs6000_parm_start (mode, type, cum->words);
12445 /* _Decimal128 must be passed in an even/odd float register pair.
12446 This assumes that the register number is odd when fregno is odd. */
12447 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12448 cum->fregno++;
12450 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12452 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12453 rtx r, off;
12454 int i, k = 0;
12455 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12456 int fpr_words;
12458 /* Do we also need to pass this argument in the parameter
12459 save area? */
12460 if (type && (cum->nargs_prototype <= 0
12461 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12462 && TARGET_XL_COMPAT
12463 && align_words >= GP_ARG_NUM_REG)))
12464 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12466 /* Describe where this argument goes in the fprs. */
12467 for (i = 0; i < n_elts
12468 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12470 /* Check if the argument is split over registers and memory.
12471 This can only ever happen for long double or _Decimal128;
12472 complex types are handled via split_complex_arg. */
12473 machine_mode fmode = elt_mode;
12474 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12476 gcc_assert (FLOAT128_2REG_P (fmode));
12477 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12480 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12481 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12482 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12485 /* If there were not enough FPRs to hold the argument, the rest
12486 usually goes into memory. However, if the current position
12487 is still within the register parameter area, a portion may
12488 actually have to go into GPRs.
12490 Note that it may happen that the portion of the argument
12491 passed in the first "half" of the first GPR was already
12492 passed in the last FPR as well.
12494 For unnamed arguments, we already set up GPRs to cover the
12495 whole argument in rs6000_psave_function_arg, so there is
12496 nothing further to do at this point. */
12497 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12498 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12499 && cum->nargs_prototype > 0)
12501 static bool warned;
12503 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12504 int n_words = rs6000_arg_size (mode, type);
12506 align_words += fpr_words;
12507 n_words -= fpr_words;
12511 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12512 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12513 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12515 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12517 if (!warned && warn_psabi)
12519 warned = true;
12520 inform (input_location,
12521 "the ABI of passing homogeneous float aggregates"
12522 " has changed in GCC 5");
12526 return rs6000_finish_function_arg (mode, rvec, k);
12528 else if (align_words < GP_ARG_NUM_REG)
12530 if (TARGET_32BIT && TARGET_POWERPC64)
12531 return rs6000_mixed_function_arg (mode, type, align_words);
12533 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12535 else
12536 return NULL_RTX;
12540 /* For an arg passed partly in registers and partly in memory, this is
12541 the number of bytes passed in registers. For args passed entirely in
12542 registers or entirely in memory, zero. When an arg is described by a
12543 PARALLEL, perhaps using more than one register type, this function
12544 returns the number of bytes used by the first element of the PARALLEL. */
12546 static int
12547 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12548 tree type, bool named)
12550 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12551 bool passed_in_gprs = true;
12552 int ret = 0;
12553 int align_words;
12554 machine_mode elt_mode;
12555 int n_elts;
12557 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12559 if (DEFAULT_ABI == ABI_V4)
12560 return 0;
12562 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12564 /* If we are passing this arg in the fixed parameter save area (gprs or
12565 memory) as well as VRs, we do not use the partial bytes mechanism;
12566 instead, rs6000_function_arg will return a PARALLEL including a memory
12567 element as necessary. Library support functions for IEEE 128-bit are
12568 assumed to not need the value passed both in GPRs and in vector
12569 registers. */
12570 if (TARGET_64BIT && !cum->prototype
12571 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12572 return 0;
12574 /* Otherwise, we pass in VRs only. Check for partial copies. */
12575 passed_in_gprs = false;
12576 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12577 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12580 /* In this complicated case we just disable the partial_nregs code. */
12581 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12582 return 0;
12584 align_words = rs6000_parm_start (mode, type, cum->words);
12586 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12588 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12590 /* If we are passing this arg in the fixed parameter save area
12591 (gprs or memory) as well as FPRs, we do not use the partial
12592 bytes mechanism; instead, rs6000_function_arg will return a
12593 PARALLEL including a memory element as necessary. */
12594 if (type
12595 && (cum->nargs_prototype <= 0
12596 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12597 && TARGET_XL_COMPAT
12598 && align_words >= GP_ARG_NUM_REG)))
12599 return 0;
12601 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12602 passed_in_gprs = false;
12603 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12605 /* Compute number of bytes / words passed in FPRs. If there
12606 is still space available in the register parameter area
12607 *after* that amount, a part of the argument will be passed
12608 in GPRs. In that case, the total amount passed in any
12609 registers is equal to the amount that would have been passed
12610 in GPRs if everything were passed there, so we fall back to
12611 the GPR code below to compute the appropriate value. */
12612 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12613 * MIN (8, GET_MODE_SIZE (elt_mode)));
12614 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12616 if (align_words + fpr_words < GP_ARG_NUM_REG)
12617 passed_in_gprs = true;
12618 else
12619 ret = fpr;
12623 if (passed_in_gprs
12624 && align_words < GP_ARG_NUM_REG
12625 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12626 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12628 if (ret != 0 && TARGET_DEBUG_ARG)
12629 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12631 return ret;
12634 /* A C expression that indicates when an argument must be passed by
12635 reference. If nonzero for an argument, a copy of that argument is
12636 made in memory and a pointer to the argument is passed instead of
12637 the argument itself. The pointer is passed in whatever way is
12638 appropriate for passing a pointer to that type.
12640 Under V.4, aggregates and long double are passed by reference.
12642 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12643 reference unless the AltiVec vector extension ABI is in force.
12645 As an extension to all ABIs, variable sized types are passed by
12646 reference. */
12648 static bool
12649 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12650 machine_mode mode, const_tree type,
12651 bool named ATTRIBUTE_UNUSED)
12653 if (!type)
12654 return 0;
12656 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12657 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12659 if (TARGET_DEBUG_ARG)
12660 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12661 return 1;
12664 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12666 if (TARGET_DEBUG_ARG)
12667 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12668 return 1;
12671 if (int_size_in_bytes (type) < 0)
12673 if (TARGET_DEBUG_ARG)
12674 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12675 return 1;
12678 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12679 modes only exist for GCC vector types if -maltivec. */
12680 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12682 if (TARGET_DEBUG_ARG)
12683 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12684 return 1;
12687 /* Pass synthetic vectors in memory. */
12688 if (TREE_CODE (type) == VECTOR_TYPE
12689 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12691 static bool warned_for_pass_big_vectors = false;
12692 if (TARGET_DEBUG_ARG)
12693 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12694 if (!warned_for_pass_big_vectors)
12696 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12697 "non-standard ABI extension with no compatibility guarantee");
12698 warned_for_pass_big_vectors = true;
12700 return 1;
12703 return 0;
12706 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12707 already processes. Return true if the parameter must be passed
12708 (fully or partially) on the stack. */
12710 static bool
12711 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12713 machine_mode mode;
12714 int unsignedp;
12715 rtx entry_parm;
12717 /* Catch errors. */
12718 if (type == NULL || type == error_mark_node)
12719 return true;
12721 /* Handle types with no storage requirement. */
12722 if (TYPE_MODE (type) == VOIDmode)
12723 return false;
12725 /* Handle complex types. */
12726 if (TREE_CODE (type) == COMPLEX_TYPE)
12727 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12728 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12730 /* Handle transparent aggregates. */
12731 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12732 && TYPE_TRANSPARENT_AGGR (type))
12733 type = TREE_TYPE (first_field (type));
12735 /* See if this arg was passed by invisible reference. */
12736 if (pass_by_reference (get_cumulative_args (args_so_far),
12737 TYPE_MODE (type), type, true))
12738 type = build_pointer_type (type);
12740 /* Find mode as it is passed by the ABI. */
12741 unsignedp = TYPE_UNSIGNED (type);
12742 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12744 /* If we must pass in stack, we need a stack. */
12745 if (rs6000_must_pass_in_stack (mode, type))
12746 return true;
12748 /* If there is no incoming register, we need a stack. */
12749 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12750 if (entry_parm == NULL)
12751 return true;
12753 /* Likewise if we need to pass both in registers and on the stack. */
12754 if (GET_CODE (entry_parm) == PARALLEL
12755 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12756 return true;
12758 /* Also true if we're partially in registers and partially not. */
12759 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12760 return true;
12762 /* Update info on where next arg arrives in registers. */
12763 rs6000_function_arg_advance (args_so_far, mode, type, true);
12764 return false;
12767 /* Return true if FUN has no prototype, has a variable argument
12768 list, or passes any parameter in memory. */
12770 static bool
12771 rs6000_function_parms_need_stack (tree fun, bool incoming)
12773 tree fntype, result;
12774 CUMULATIVE_ARGS args_so_far_v;
12775 cumulative_args_t args_so_far;
12777 if (!fun)
12778 /* Must be a libcall, all of which only use reg parms. */
12779 return false;
12781 fntype = fun;
12782 if (!TYPE_P (fun))
12783 fntype = TREE_TYPE (fun);
12785 /* Varargs functions need the parameter save area. */
12786 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12787 return true;
12789 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12790 args_so_far = pack_cumulative_args (&args_so_far_v);
12792 /* When incoming, we will have been passed the function decl.
12793 It is necessary to use the decl to handle K&R style functions,
12794 where TYPE_ARG_TYPES may not be available. */
12795 if (incoming)
12797 gcc_assert (DECL_P (fun));
12798 result = DECL_RESULT (fun);
12800 else
12801 result = TREE_TYPE (fntype);
12803 if (result && aggregate_value_p (result, fntype))
12805 if (!TYPE_P (result))
12806 result = TREE_TYPE (result);
12807 result = build_pointer_type (result);
12808 rs6000_parm_needs_stack (args_so_far, result);
12811 if (incoming)
12813 tree parm;
12815 for (parm = DECL_ARGUMENTS (fun);
12816 parm && parm != void_list_node;
12817 parm = TREE_CHAIN (parm))
12818 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12819 return true;
12821 else
12823 function_args_iterator args_iter;
12824 tree arg_type;
12826 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12827 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12828 return true;
12831 return false;
12834 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12835 usually a constant depending on the ABI. However, in the ELFv2 ABI
12836 the register parameter area is optional when calling a function that
12837 has a prototype is scope, has no variable argument list, and passes
12838 all parameters in registers. */
12841 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12843 int reg_parm_stack_space;
12845 switch (DEFAULT_ABI)
12847 default:
12848 reg_parm_stack_space = 0;
12849 break;
12851 case ABI_AIX:
12852 case ABI_DARWIN:
12853 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12854 break;
12856 case ABI_ELFv2:
12857 /* ??? Recomputing this every time is a bit expensive. Is there
12858 a place to cache this information? */
12859 if (rs6000_function_parms_need_stack (fun, incoming))
12860 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12861 else
12862 reg_parm_stack_space = 0;
12863 break;
12866 return reg_parm_stack_space;
12869 static void
12870 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12872 int i;
12873 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12875 if (nregs == 0)
12876 return;
12878 for (i = 0; i < nregs; i++)
12880 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12881 if (reload_completed)
12883 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12884 tem = NULL_RTX;
12885 else
12886 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12887 i * GET_MODE_SIZE (reg_mode));
12889 else
12890 tem = replace_equiv_address (tem, XEXP (tem, 0));
12892 gcc_assert (tem);
12894 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12898 /* Perform any needed actions needed for a function that is receiving a
12899 variable number of arguments.
12901 CUM is as above.
12903 MODE and TYPE are the mode and type of the current parameter.
12905 PRETEND_SIZE is a variable that should be set to the amount of stack
12906 that must be pushed by the prolog to pretend that our caller pushed
12909 Normally, this macro will push all remaining incoming registers on the
12910 stack and set PRETEND_SIZE to the length of the registers pushed. */
12912 static void
12913 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12914 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12915 int no_rtl)
12917 CUMULATIVE_ARGS next_cum;
12918 int reg_size = TARGET_32BIT ? 4 : 8;
12919 rtx save_area = NULL_RTX, mem;
12920 int first_reg_offset;
12921 alias_set_type set;
12923 /* Skip the last named argument. */
12924 next_cum = *get_cumulative_args (cum);
12925 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12927 if (DEFAULT_ABI == ABI_V4)
12929 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12931 if (! no_rtl)
12933 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12934 HOST_WIDE_INT offset = 0;
12936 /* Try to optimize the size of the varargs save area.
12937 The ABI requires that ap.reg_save_area is doubleword
12938 aligned, but we don't need to allocate space for all
12939 the bytes, only those to which we actually will save
12940 anything. */
12941 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12942 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12943 if (TARGET_HARD_FLOAT && TARGET_FPRS
12944 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12945 && cfun->va_list_fpr_size)
12947 if (gpr_reg_num)
12948 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12949 * UNITS_PER_FP_WORD;
12950 if (cfun->va_list_fpr_size
12951 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12952 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12953 else
12954 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12955 * UNITS_PER_FP_WORD;
12957 if (gpr_reg_num)
12959 offset = -((first_reg_offset * reg_size) & ~7);
12960 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12962 gpr_reg_num = cfun->va_list_gpr_size;
12963 if (reg_size == 4 && (first_reg_offset & 1))
12964 gpr_reg_num++;
12966 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12968 else if (fpr_size)
12969 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12970 * UNITS_PER_FP_WORD
12971 - (int) (GP_ARG_NUM_REG * reg_size);
12973 if (gpr_size + fpr_size)
12975 rtx reg_save_area
12976 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12977 gcc_assert (GET_CODE (reg_save_area) == MEM);
12978 reg_save_area = XEXP (reg_save_area, 0);
12979 if (GET_CODE (reg_save_area) == PLUS)
12981 gcc_assert (XEXP (reg_save_area, 0)
12982 == virtual_stack_vars_rtx);
12983 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12984 offset += INTVAL (XEXP (reg_save_area, 1));
12986 else
12987 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12990 cfun->machine->varargs_save_offset = offset;
12991 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12994 else
12996 first_reg_offset = next_cum.words;
12997 save_area = crtl->args.internal_arg_pointer;
12999 if (targetm.calls.must_pass_in_stack (mode, type))
13000 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13003 set = get_varargs_alias_set ();
13004 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13005 && cfun->va_list_gpr_size)
13007 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13009 if (va_list_gpr_counter_field)
13010 /* V4 va_list_gpr_size counts number of registers needed. */
13011 n_gpr = cfun->va_list_gpr_size;
13012 else
13013 /* char * va_list instead counts number of bytes needed. */
13014 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13016 if (nregs > n_gpr)
13017 nregs = n_gpr;
13019 mem = gen_rtx_MEM (BLKmode,
13020 plus_constant (Pmode, save_area,
13021 first_reg_offset * reg_size));
13022 MEM_NOTRAP_P (mem) = 1;
13023 set_mem_alias_set (mem, set);
13024 set_mem_align (mem, BITS_PER_WORD);
13026 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13027 nregs);
13030 /* Save FP registers if needed. */
13031 if (DEFAULT_ABI == ABI_V4
13032 && TARGET_HARD_FLOAT && TARGET_FPRS
13033 && ! no_rtl
13034 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13035 && cfun->va_list_fpr_size)
13037 int fregno = next_cum.fregno, nregs;
13038 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13039 rtx lab = gen_label_rtx ();
13040 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13041 * UNITS_PER_FP_WORD);
13043 emit_jump_insn
13044 (gen_rtx_SET (pc_rtx,
13045 gen_rtx_IF_THEN_ELSE (VOIDmode,
13046 gen_rtx_NE (VOIDmode, cr1,
13047 const0_rtx),
13048 gen_rtx_LABEL_REF (VOIDmode, lab),
13049 pc_rtx)));
13051 for (nregs = 0;
13052 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13053 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13055 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13056 ? DFmode : SFmode,
13057 plus_constant (Pmode, save_area, off));
13058 MEM_NOTRAP_P (mem) = 1;
13059 set_mem_alias_set (mem, set);
13060 set_mem_align (mem, GET_MODE_ALIGNMENT (
13061 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13062 ? DFmode : SFmode));
13063 emit_move_insn (mem, gen_rtx_REG (
13064 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13065 ? DFmode : SFmode, fregno));
13068 emit_label (lab);
13072 /* Create the va_list data type. */
13074 static tree
13075 rs6000_build_builtin_va_list (void)
13077 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13079 /* For AIX, prefer 'char *' because that's what the system
13080 header files like. */
13081 if (DEFAULT_ABI != ABI_V4)
13082 return build_pointer_type (char_type_node);
13084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13085 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13086 get_identifier ("__va_list_tag"), record);
13088 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13089 unsigned_char_type_node);
13090 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13091 unsigned_char_type_node);
13092 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13093 every user file. */
13094 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13095 get_identifier ("reserved"), short_unsigned_type_node);
13096 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13097 get_identifier ("overflow_arg_area"),
13098 ptr_type_node);
13099 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13100 get_identifier ("reg_save_area"),
13101 ptr_type_node);
13103 va_list_gpr_counter_field = f_gpr;
13104 va_list_fpr_counter_field = f_fpr;
13106 DECL_FIELD_CONTEXT (f_gpr) = record;
13107 DECL_FIELD_CONTEXT (f_fpr) = record;
13108 DECL_FIELD_CONTEXT (f_res) = record;
13109 DECL_FIELD_CONTEXT (f_ovf) = record;
13110 DECL_FIELD_CONTEXT (f_sav) = record;
13112 TYPE_STUB_DECL (record) = type_decl;
13113 TYPE_NAME (record) = type_decl;
13114 TYPE_FIELDS (record) = f_gpr;
13115 DECL_CHAIN (f_gpr) = f_fpr;
13116 DECL_CHAIN (f_fpr) = f_res;
13117 DECL_CHAIN (f_res) = f_ovf;
13118 DECL_CHAIN (f_ovf) = f_sav;
13120 layout_type (record);
13122 /* The correct type is an array type of one element. */
13123 return build_array_type (record, build_index_type (size_zero_node));
13126 /* Implement va_start. */
13128 static void
13129 rs6000_va_start (tree valist, rtx nextarg)
13131 HOST_WIDE_INT words, n_gpr, n_fpr;
13132 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13133 tree gpr, fpr, ovf, sav, t;
13135 /* Only SVR4 needs something special. */
13136 if (DEFAULT_ABI != ABI_V4)
13138 std_expand_builtin_va_start (valist, nextarg);
13139 return;
13142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13143 f_fpr = DECL_CHAIN (f_gpr);
13144 f_res = DECL_CHAIN (f_fpr);
13145 f_ovf = DECL_CHAIN (f_res);
13146 f_sav = DECL_CHAIN (f_ovf);
13148 valist = build_simple_mem_ref (valist);
13149 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13150 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13151 f_fpr, NULL_TREE);
13152 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13153 f_ovf, NULL_TREE);
13154 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13155 f_sav, NULL_TREE);
13157 /* Count number of gp and fp argument registers used. */
13158 words = crtl->args.info.words;
13159 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13160 GP_ARG_NUM_REG);
13161 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13162 FP_ARG_NUM_REG);
13164 if (TARGET_DEBUG_ARG)
13165 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13166 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13167 words, n_gpr, n_fpr);
13169 if (cfun->va_list_gpr_size)
13171 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13172 build_int_cst (NULL_TREE, n_gpr));
13173 TREE_SIDE_EFFECTS (t) = 1;
13174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13177 if (cfun->va_list_fpr_size)
13179 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13180 build_int_cst (NULL_TREE, n_fpr));
13181 TREE_SIDE_EFFECTS (t) = 1;
13182 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13184 #ifdef HAVE_AS_GNU_ATTRIBUTE
13185 if (call_ABI_of_interest (cfun->decl))
13186 rs6000_passes_float = true;
13187 #endif
13190 /* Find the overflow area. */
13191 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13192 if (words != 0)
13193 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13194 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13195 TREE_SIDE_EFFECTS (t) = 1;
13196 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13198 /* If there were no va_arg invocations, don't set up the register
13199 save area. */
13200 if (!cfun->va_list_gpr_size
13201 && !cfun->va_list_fpr_size
13202 && n_gpr < GP_ARG_NUM_REG
13203 && n_fpr < FP_ARG_V4_MAX_REG)
13204 return;
13206 /* Find the register save area. */
13207 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13208 if (cfun->machine->varargs_save_offset)
13209 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13210 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13211 TREE_SIDE_EFFECTS (t) = 1;
13212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13215 /* Implement va_arg. */
13217 static tree
13218 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13219 gimple_seq *post_p)
13221 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13222 tree gpr, fpr, ovf, sav, reg, t, u;
13223 int size, rsize, n_reg, sav_ofs, sav_scale;
13224 tree lab_false, lab_over, addr;
13225 int align;
13226 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13227 int regalign = 0;
13228 gimple *stmt;
13230 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13232 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13233 return build_va_arg_indirect_ref (t);
13236 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13237 earlier version of gcc, with the property that it always applied alignment
13238 adjustments to the va-args (even for zero-sized types). The cheapest way
13239 to deal with this is to replicate the effect of the part of
13240 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13241 of relevance.
13242 We don't need to check for pass-by-reference because of the test above.
13243 We can return a simplifed answer, since we know there's no offset to add. */
13245 if (((TARGET_MACHO
13246 && rs6000_darwin64_abi)
13247 || DEFAULT_ABI == ABI_ELFv2
13248 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13249 && integer_zerop (TYPE_SIZE (type)))
13251 unsigned HOST_WIDE_INT align, boundary;
13252 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13253 align = PARM_BOUNDARY / BITS_PER_UNIT;
13254 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13255 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13256 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13257 boundary /= BITS_PER_UNIT;
13258 if (boundary > align)
13260 tree t ;
13261 /* This updates arg ptr by the amount that would be necessary
13262 to align the zero-sized (but not zero-alignment) item. */
13263 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13264 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13265 gimplify_and_add (t, pre_p);
13267 t = fold_convert (sizetype, valist_tmp);
13268 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13269 fold_convert (TREE_TYPE (valist),
13270 fold_build2 (BIT_AND_EXPR, sizetype, t,
13271 size_int (-boundary))));
13272 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13273 gimplify_and_add (t, pre_p);
13275 /* Since it is zero-sized there's no increment for the item itself. */
13276 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13277 return build_va_arg_indirect_ref (valist_tmp);
13280 if (DEFAULT_ABI != ABI_V4)
13282 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13284 tree elem_type = TREE_TYPE (type);
13285 machine_mode elem_mode = TYPE_MODE (elem_type);
13286 int elem_size = GET_MODE_SIZE (elem_mode);
13288 if (elem_size < UNITS_PER_WORD)
13290 tree real_part, imag_part;
13291 gimple_seq post = NULL;
13293 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13294 &post);
13295 /* Copy the value into a temporary, lest the formal temporary
13296 be reused out from under us. */
13297 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13298 gimple_seq_add_seq (pre_p, post);
13300 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13301 post_p);
13303 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13307 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13310 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13311 f_fpr = DECL_CHAIN (f_gpr);
13312 f_res = DECL_CHAIN (f_fpr);
13313 f_ovf = DECL_CHAIN (f_res);
13314 f_sav = DECL_CHAIN (f_ovf);
13316 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13317 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13318 f_fpr, NULL_TREE);
13319 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13320 f_ovf, NULL_TREE);
13321 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13322 f_sav, NULL_TREE);
13324 size = int_size_in_bytes (type);
13325 rsize = (size + 3) / 4;
13326 align = 1;
13328 machine_mode mode = TYPE_MODE (type);
13329 if (abi_v4_pass_in_fpr (mode))
13331 /* FP args go in FP registers, if present. */
13332 reg = fpr;
13333 n_reg = (size + 7) / 8;
13334 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13335 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13336 if (mode != SFmode && mode != SDmode)
13337 align = 8;
13339 else
13341 /* Otherwise into GP registers. */
13342 reg = gpr;
13343 n_reg = rsize;
13344 sav_ofs = 0;
13345 sav_scale = 4;
13346 if (n_reg == 2)
13347 align = 8;
13350 /* Pull the value out of the saved registers.... */
13352 lab_over = NULL;
13353 addr = create_tmp_var (ptr_type_node, "addr");
13355 /* AltiVec vectors never go in registers when -mabi=altivec. */
13356 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13357 align = 16;
13358 else
13360 lab_false = create_artificial_label (input_location);
13361 lab_over = create_artificial_label (input_location);
13363 /* Long long and SPE vectors are aligned in the registers.
13364 As are any other 2 gpr item such as complex int due to a
13365 historical mistake. */
13366 u = reg;
13367 if (n_reg == 2 && reg == gpr)
13369 regalign = 1;
13370 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13371 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13372 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13373 unshare_expr (reg), u);
13375 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13376 reg number is 0 for f1, so we want to make it odd. */
13377 else if (reg == fpr && mode == TDmode)
13379 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13380 build_int_cst (TREE_TYPE (reg), 1));
13381 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13384 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13385 t = build2 (GE_EXPR, boolean_type_node, u, t);
13386 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13387 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13388 gimplify_and_add (t, pre_p);
13390 t = sav;
13391 if (sav_ofs)
13392 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13394 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13395 build_int_cst (TREE_TYPE (reg), n_reg));
13396 u = fold_convert (sizetype, u);
13397 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13398 t = fold_build_pointer_plus (t, u);
13400 /* _Decimal32 varargs are located in the second word of the 64-bit
13401 FP register for 32-bit binaries. */
13402 if (TARGET_32BIT
13403 && TARGET_HARD_FLOAT && TARGET_FPRS
13404 && mode == SDmode)
13405 t = fold_build_pointer_plus_hwi (t, size);
13407 gimplify_assign (addr, t, pre_p);
13409 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13411 stmt = gimple_build_label (lab_false);
13412 gimple_seq_add_stmt (pre_p, stmt);
13414 if ((n_reg == 2 && !regalign) || n_reg > 2)
13416 /* Ensure that we don't find any more args in regs.
13417 Alignment has taken care of for special cases. */
13418 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13422 /* ... otherwise out of the overflow area. */
13424 /* Care for on-stack alignment if needed. */
13425 t = ovf;
13426 if (align != 1)
13428 t = fold_build_pointer_plus_hwi (t, align - 1);
13429 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13430 build_int_cst (TREE_TYPE (t), -align));
13432 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13434 gimplify_assign (unshare_expr (addr), t, pre_p);
13436 t = fold_build_pointer_plus_hwi (t, size);
13437 gimplify_assign (unshare_expr (ovf), t, pre_p);
13439 if (lab_over)
13441 stmt = gimple_build_label (lab_over);
13442 gimple_seq_add_stmt (pre_p, stmt);
13445 if (STRICT_ALIGNMENT
13446 && (TYPE_ALIGN (type)
13447 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13449 /* The value (of type complex double, for example) may not be
13450 aligned in memory in the saved registers, so copy via a
13451 temporary. (This is the same code as used for SPARC.) */
13452 tree tmp = create_tmp_var (type, "va_arg_tmp");
13453 tree dest_addr = build_fold_addr_expr (tmp);
13455 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13456 3, dest_addr, addr, size_int (rsize * 4));
13458 gimplify_and_add (copy, pre_p);
13459 addr = dest_addr;
13462 addr = fold_convert (ptrtype, addr);
13463 return build_va_arg_indirect_ref (addr);
13466 /* Builtins. */
13468 static void
13469 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13471 tree t;
13472 unsigned classify = rs6000_builtin_info[(int)code].attr;
13473 const char *attr_string = "";
13475 gcc_assert (name != NULL);
13476 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13478 if (rs6000_builtin_decls[(int)code])
13479 fatal_error (input_location,
13480 "internal error: builtin function %s already processed", name);
13482 rs6000_builtin_decls[(int)code] = t =
13483 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13485 /* Set any special attributes. */
13486 if ((classify & RS6000_BTC_CONST) != 0)
13488 /* const function, function only depends on the inputs. */
13489 TREE_READONLY (t) = 1;
13490 TREE_NOTHROW (t) = 1;
13491 attr_string = ", const";
13493 else if ((classify & RS6000_BTC_PURE) != 0)
13495 /* pure function, function can read global memory, but does not set any
13496 external state. */
13497 DECL_PURE_P (t) = 1;
13498 TREE_NOTHROW (t) = 1;
13499 attr_string = ", pure";
13501 else if ((classify & RS6000_BTC_FP) != 0)
13503 /* Function is a math function. If rounding mode is on, then treat the
13504 function as not reading global memory, but it can have arbitrary side
13505 effects. If it is off, then assume the function is a const function.
13506 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13507 builtin-attribute.def that is used for the math functions. */
13508 TREE_NOTHROW (t) = 1;
13509 if (flag_rounding_math)
13511 DECL_PURE_P (t) = 1;
13512 DECL_IS_NOVOPS (t) = 1;
13513 attr_string = ", fp, pure";
13515 else
13517 TREE_READONLY (t) = 1;
13518 attr_string = ", fp, const";
13521 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13522 gcc_unreachable ();
13524 if (TARGET_DEBUG_BUILTIN)
13525 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13526 (int)code, name, attr_string);
13529 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13531 #undef RS6000_BUILTIN_0
13532 #undef RS6000_BUILTIN_1
13533 #undef RS6000_BUILTIN_2
13534 #undef RS6000_BUILTIN_3
13535 #undef RS6000_BUILTIN_A
13536 #undef RS6000_BUILTIN_D
13537 #undef RS6000_BUILTIN_E
13538 #undef RS6000_BUILTIN_H
13539 #undef RS6000_BUILTIN_P
13540 #undef RS6000_BUILTIN_Q
13541 #undef RS6000_BUILTIN_S
13542 #undef RS6000_BUILTIN_X
13544 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13545 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13546 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13547 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13548 { MASK, ICODE, NAME, ENUM },
13550 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13551 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13552 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13553 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13554 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13555 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13556 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13557 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13559 static const struct builtin_description bdesc_3arg[] =
13561 #include "rs6000-builtin.def"
13564 /* DST operations: void foo (void *, const int, const char). */
13566 #undef RS6000_BUILTIN_0
13567 #undef RS6000_BUILTIN_1
13568 #undef RS6000_BUILTIN_2
13569 #undef RS6000_BUILTIN_3
13570 #undef RS6000_BUILTIN_A
13571 #undef RS6000_BUILTIN_D
13572 #undef RS6000_BUILTIN_E
13573 #undef RS6000_BUILTIN_H
13574 #undef RS6000_BUILTIN_P
13575 #undef RS6000_BUILTIN_Q
13576 #undef RS6000_BUILTIN_S
13577 #undef RS6000_BUILTIN_X
13579 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13580 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13581 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13582 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13583 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13584 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13585 { MASK, ICODE, NAME, ENUM },
13587 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13588 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13589 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13590 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13591 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13592 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13594 static const struct builtin_description bdesc_dst[] =
13596 #include "rs6000-builtin.def"
13599 /* Simple binary operations: VECc = foo (VECa, VECb). */
13601 #undef RS6000_BUILTIN_0
13602 #undef RS6000_BUILTIN_1
13603 #undef RS6000_BUILTIN_2
13604 #undef RS6000_BUILTIN_3
13605 #undef RS6000_BUILTIN_A
13606 #undef RS6000_BUILTIN_D
13607 #undef RS6000_BUILTIN_E
13608 #undef RS6000_BUILTIN_H
13609 #undef RS6000_BUILTIN_P
13610 #undef RS6000_BUILTIN_Q
13611 #undef RS6000_BUILTIN_S
13612 #undef RS6000_BUILTIN_X
13614 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13615 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13616 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13617 { MASK, ICODE, NAME, ENUM },
13619 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13620 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13621 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13622 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13623 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13624 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13625 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13626 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13627 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13629 static const struct builtin_description bdesc_2arg[] =
13631 #include "rs6000-builtin.def"
13634 #undef RS6000_BUILTIN_0
13635 #undef RS6000_BUILTIN_1
13636 #undef RS6000_BUILTIN_2
13637 #undef RS6000_BUILTIN_3
13638 #undef RS6000_BUILTIN_A
13639 #undef RS6000_BUILTIN_D
13640 #undef RS6000_BUILTIN_E
13641 #undef RS6000_BUILTIN_H
13642 #undef RS6000_BUILTIN_P
13643 #undef RS6000_BUILTIN_Q
13644 #undef RS6000_BUILTIN_S
13645 #undef RS6000_BUILTIN_X
13647 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13648 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13649 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13650 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13651 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13652 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13653 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13654 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13655 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13656 { MASK, ICODE, NAME, ENUM },
13658 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13659 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13660 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13662 /* AltiVec predicates. */
13664 static const struct builtin_description bdesc_altivec_preds[] =
13666 #include "rs6000-builtin.def"
13669 /* SPE predicates. */
13670 #undef RS6000_BUILTIN_0
13671 #undef RS6000_BUILTIN_1
13672 #undef RS6000_BUILTIN_2
13673 #undef RS6000_BUILTIN_3
13674 #undef RS6000_BUILTIN_A
13675 #undef RS6000_BUILTIN_D
13676 #undef RS6000_BUILTIN_E
13677 #undef RS6000_BUILTIN_H
13678 #undef RS6000_BUILTIN_P
13679 #undef RS6000_BUILTIN_Q
13680 #undef RS6000_BUILTIN_S
13681 #undef RS6000_BUILTIN_X
13683 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13684 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13685 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13686 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13687 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13688 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13689 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13690 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13691 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13692 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13693 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13694 { MASK, ICODE, NAME, ENUM },
13696 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13698 static const struct builtin_description bdesc_spe_predicates[] =
13700 #include "rs6000-builtin.def"
13703 /* SPE evsel predicates. */
13704 #undef RS6000_BUILTIN_0
13705 #undef RS6000_BUILTIN_1
13706 #undef RS6000_BUILTIN_2
13707 #undef RS6000_BUILTIN_3
13708 #undef RS6000_BUILTIN_A
13709 #undef RS6000_BUILTIN_D
13710 #undef RS6000_BUILTIN_E
13711 #undef RS6000_BUILTIN_H
13712 #undef RS6000_BUILTIN_P
13713 #undef RS6000_BUILTIN_Q
13714 #undef RS6000_BUILTIN_S
13715 #undef RS6000_BUILTIN_X
13717 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13718 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13719 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13720 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13723 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13724 { MASK, ICODE, NAME, ENUM },
13726 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13727 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13728 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13729 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13730 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13732 static const struct builtin_description bdesc_spe_evsel[] =
13734 #include "rs6000-builtin.def"
13737 /* PAIRED predicates. */
13738 #undef RS6000_BUILTIN_0
13739 #undef RS6000_BUILTIN_1
13740 #undef RS6000_BUILTIN_2
13741 #undef RS6000_BUILTIN_3
13742 #undef RS6000_BUILTIN_A
13743 #undef RS6000_BUILTIN_D
13744 #undef RS6000_BUILTIN_E
13745 #undef RS6000_BUILTIN_H
13746 #undef RS6000_BUILTIN_P
13747 #undef RS6000_BUILTIN_Q
13748 #undef RS6000_BUILTIN_S
13749 #undef RS6000_BUILTIN_X
13751 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13752 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13753 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13754 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13755 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13756 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13757 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13758 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13759 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13760 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13761 { MASK, ICODE, NAME, ENUM },
13763 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13764 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13766 static const struct builtin_description bdesc_paired_preds[] =
13768 #include "rs6000-builtin.def"
13771 /* ABS* operations. */
13773 #undef RS6000_BUILTIN_0
13774 #undef RS6000_BUILTIN_1
13775 #undef RS6000_BUILTIN_2
13776 #undef RS6000_BUILTIN_3
13777 #undef RS6000_BUILTIN_A
13778 #undef RS6000_BUILTIN_D
13779 #undef RS6000_BUILTIN_E
13780 #undef RS6000_BUILTIN_H
13781 #undef RS6000_BUILTIN_P
13782 #undef RS6000_BUILTIN_Q
13783 #undef RS6000_BUILTIN_S
13784 #undef RS6000_BUILTIN_X
13786 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13787 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13788 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13789 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13790 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13791 { MASK, ICODE, NAME, ENUM },
13793 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13794 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13795 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13796 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13797 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13798 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13799 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13801 static const struct builtin_description bdesc_abs[] =
13803 #include "rs6000-builtin.def"
13806 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13807 foo (VECa). */
13809 #undef RS6000_BUILTIN_0
13810 #undef RS6000_BUILTIN_1
13811 #undef RS6000_BUILTIN_2
13812 #undef RS6000_BUILTIN_3
13813 #undef RS6000_BUILTIN_A
13814 #undef RS6000_BUILTIN_D
13815 #undef RS6000_BUILTIN_E
13816 #undef RS6000_BUILTIN_H
13817 #undef RS6000_BUILTIN_P
13818 #undef RS6000_BUILTIN_Q
13819 #undef RS6000_BUILTIN_S
13820 #undef RS6000_BUILTIN_X
13822 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13823 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13824 { MASK, ICODE, NAME, ENUM },
13826 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13827 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13828 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13829 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13830 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13831 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13832 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13833 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13834 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13835 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13837 static const struct builtin_description bdesc_1arg[] =
13839 #include "rs6000-builtin.def"
13842 /* Simple no-argument operations: result = __builtin_darn_32 () */
13844 #undef RS6000_BUILTIN_0
13845 #undef RS6000_BUILTIN_1
13846 #undef RS6000_BUILTIN_2
13847 #undef RS6000_BUILTIN_3
13848 #undef RS6000_BUILTIN_A
13849 #undef RS6000_BUILTIN_D
13850 #undef RS6000_BUILTIN_E
13851 #undef RS6000_BUILTIN_H
13852 #undef RS6000_BUILTIN_P
13853 #undef RS6000_BUILTIN_Q
13854 #undef RS6000_BUILTIN_S
13855 #undef RS6000_BUILTIN_X
13857 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13858 { MASK, ICODE, NAME, ENUM },
13860 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13861 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13862 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13863 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13864 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13865 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13866 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13867 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13868 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13869 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13870 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13872 static const struct builtin_description bdesc_0arg[] =
13874 #include "rs6000-builtin.def"
13877 /* HTM builtins. */
13878 #undef RS6000_BUILTIN_0
13879 #undef RS6000_BUILTIN_1
13880 #undef RS6000_BUILTIN_2
13881 #undef RS6000_BUILTIN_3
13882 #undef RS6000_BUILTIN_A
13883 #undef RS6000_BUILTIN_D
13884 #undef RS6000_BUILTIN_E
13885 #undef RS6000_BUILTIN_H
13886 #undef RS6000_BUILTIN_P
13887 #undef RS6000_BUILTIN_Q
13888 #undef RS6000_BUILTIN_S
13889 #undef RS6000_BUILTIN_X
13891 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13892 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13893 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13894 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13895 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13896 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13897 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13898 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13899 { MASK, ICODE, NAME, ENUM },
13901 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13902 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13903 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13904 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13906 static const struct builtin_description bdesc_htm[] =
13908 #include "rs6000-builtin.def"
13911 #undef RS6000_BUILTIN_0
13912 #undef RS6000_BUILTIN_1
13913 #undef RS6000_BUILTIN_2
13914 #undef RS6000_BUILTIN_3
13915 #undef RS6000_BUILTIN_A
13916 #undef RS6000_BUILTIN_D
13917 #undef RS6000_BUILTIN_E
13918 #undef RS6000_BUILTIN_H
13919 #undef RS6000_BUILTIN_P
13920 #undef RS6000_BUILTIN_Q
13921 #undef RS6000_BUILTIN_S
13923 /* Return true if a builtin function is overloaded. */
13924 bool
13925 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13927 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13930 const char *
13931 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13933 return rs6000_builtin_info[(int)fncode].name;
13936 /* Expand an expression EXP that calls a builtin without arguments. */
13937 static rtx
13938 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13940 rtx pat;
13941 machine_mode tmode = insn_data[icode].operand[0].mode;
13943 if (icode == CODE_FOR_nothing)
13944 /* Builtin not supported on this processor. */
13945 return 0;
13947 if (target == 0
13948 || GET_MODE (target) != tmode
13949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950 target = gen_reg_rtx (tmode);
13952 pat = GEN_FCN (icode) (target);
13953 if (! pat)
13954 return 0;
13955 emit_insn (pat);
13957 return target;
13961 static rtx
13962 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13964 rtx pat;
13965 tree arg0 = CALL_EXPR_ARG (exp, 0);
13966 tree arg1 = CALL_EXPR_ARG (exp, 1);
13967 rtx op0 = expand_normal (arg0);
13968 rtx op1 = expand_normal (arg1);
13969 machine_mode mode0 = insn_data[icode].operand[0].mode;
13970 machine_mode mode1 = insn_data[icode].operand[1].mode;
13972 if (icode == CODE_FOR_nothing)
13973 /* Builtin not supported on this processor. */
13974 return 0;
13976 /* If we got invalid arguments bail out before generating bad rtl. */
13977 if (arg0 == error_mark_node || arg1 == error_mark_node)
13978 return const0_rtx;
13980 if (GET_CODE (op0) != CONST_INT
13981 || INTVAL (op0) > 255
13982 || INTVAL (op0) < 0)
13984 error ("argument 1 must be an 8-bit field value");
13985 return const0_rtx;
13988 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (mode0, op0);
13991 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13992 op1 = copy_to_mode_reg (mode1, op1);
13994 pat = GEN_FCN (icode) (op0, op1);
13995 if (! pat)
13996 return const0_rtx;
13997 emit_insn (pat);
13999 return NULL_RTX;
14002 static rtx
14003 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14005 rtx pat;
14006 tree arg0 = CALL_EXPR_ARG (exp, 0);
14007 rtx op0 = expand_normal (arg0);
14008 machine_mode tmode = insn_data[icode].operand[0].mode;
14009 machine_mode mode0 = insn_data[icode].operand[1].mode;
14011 if (icode == CODE_FOR_nothing)
14012 /* Builtin not supported on this processor. */
14013 return 0;
14015 /* If we got invalid arguments bail out before generating bad rtl. */
14016 if (arg0 == error_mark_node)
14017 return const0_rtx;
14019 if (icode == CODE_FOR_altivec_vspltisb
14020 || icode == CODE_FOR_altivec_vspltish
14021 || icode == CODE_FOR_altivec_vspltisw
14022 || icode == CODE_FOR_spe_evsplatfi
14023 || icode == CODE_FOR_spe_evsplati)
14025 /* Only allow 5-bit *signed* literals. */
14026 if (GET_CODE (op0) != CONST_INT
14027 || INTVAL (op0) > 15
14028 || INTVAL (op0) < -16)
14030 error ("argument 1 must be a 5-bit signed literal");
14031 return const0_rtx;
14035 if (target == 0
14036 || GET_MODE (target) != tmode
14037 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14038 target = gen_reg_rtx (tmode);
14040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14041 op0 = copy_to_mode_reg (mode0, op0);
14043 pat = GEN_FCN (icode) (target, op0);
14044 if (! pat)
14045 return 0;
14046 emit_insn (pat);
14048 return target;
14051 static rtx
14052 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14054 rtx pat, scratch1, scratch2;
14055 tree arg0 = CALL_EXPR_ARG (exp, 0);
14056 rtx op0 = expand_normal (arg0);
14057 machine_mode tmode = insn_data[icode].operand[0].mode;
14058 machine_mode mode0 = insn_data[icode].operand[1].mode;
14060 /* If we have invalid arguments, bail out before generating bad rtl. */
14061 if (arg0 == error_mark_node)
14062 return const0_rtx;
14064 if (target == 0
14065 || GET_MODE (target) != tmode
14066 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14067 target = gen_reg_rtx (tmode);
14069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14070 op0 = copy_to_mode_reg (mode0, op0);
14072 scratch1 = gen_reg_rtx (mode0);
14073 scratch2 = gen_reg_rtx (mode0);
14075 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14076 if (! pat)
14077 return 0;
14078 emit_insn (pat);
14080 return target;
14083 static rtx
14084 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14086 rtx pat;
14087 tree arg0 = CALL_EXPR_ARG (exp, 0);
14088 tree arg1 = CALL_EXPR_ARG (exp, 1);
14089 rtx op0 = expand_normal (arg0);
14090 rtx op1 = expand_normal (arg1);
14091 machine_mode tmode = insn_data[icode].operand[0].mode;
14092 machine_mode mode0 = insn_data[icode].operand[1].mode;
14093 machine_mode mode1 = insn_data[icode].operand[2].mode;
14095 if (icode == CODE_FOR_nothing)
14096 /* Builtin not supported on this processor. */
14097 return 0;
14099 /* If we got invalid arguments bail out before generating bad rtl. */
14100 if (arg0 == error_mark_node || arg1 == error_mark_node)
14101 return const0_rtx;
14103 if (icode == CODE_FOR_altivec_vcfux
14104 || icode == CODE_FOR_altivec_vcfsx
14105 || icode == CODE_FOR_altivec_vctsxs
14106 || icode == CODE_FOR_altivec_vctuxs
14107 || icode == CODE_FOR_altivec_vspltb
14108 || icode == CODE_FOR_altivec_vsplth
14109 || icode == CODE_FOR_altivec_vspltw
14110 || icode == CODE_FOR_spe_evaddiw
14111 || icode == CODE_FOR_spe_evldd
14112 || icode == CODE_FOR_spe_evldh
14113 || icode == CODE_FOR_spe_evldw
14114 || icode == CODE_FOR_spe_evlhhesplat
14115 || icode == CODE_FOR_spe_evlhhossplat
14116 || icode == CODE_FOR_spe_evlhhousplat
14117 || icode == CODE_FOR_spe_evlwhe
14118 || icode == CODE_FOR_spe_evlwhos
14119 || icode == CODE_FOR_spe_evlwhou
14120 || icode == CODE_FOR_spe_evlwhsplat
14121 || icode == CODE_FOR_spe_evlwwsplat
14122 || icode == CODE_FOR_spe_evrlwi
14123 || icode == CODE_FOR_spe_evslwi
14124 || icode == CODE_FOR_spe_evsrwis
14125 || icode == CODE_FOR_spe_evsubifw
14126 || icode == CODE_FOR_spe_evsrwiu)
14128 /* Only allow 5-bit unsigned literals. */
14129 STRIP_NOPS (arg1);
14130 if (TREE_CODE (arg1) != INTEGER_CST
14131 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14133 error ("argument 2 must be a 5-bit unsigned literal");
14134 return const0_rtx;
14137 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14138 || icode == CODE_FOR_dfptstsfi_lt_dd
14139 || icode == CODE_FOR_dfptstsfi_gt_dd
14140 || icode == CODE_FOR_dfptstsfi_unordered_dd
14141 || icode == CODE_FOR_dfptstsfi_eq_td
14142 || icode == CODE_FOR_dfptstsfi_lt_td
14143 || icode == CODE_FOR_dfptstsfi_gt_td
14144 || icode == CODE_FOR_dfptstsfi_unordered_td)
14146 /* Only allow 6-bit unsigned literals. */
14147 STRIP_NOPS (arg0);
14148 if (TREE_CODE (arg0) != INTEGER_CST
14149 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14151 error ("argument 1 must be a 6-bit unsigned literal");
14152 return CONST0_RTX (tmode);
14155 else if (icode == CODE_FOR_xststdcdp
14156 || icode == CODE_FOR_xststdcsp
14157 || icode == CODE_FOR_xvtstdcdp
14158 || icode == CODE_FOR_xvtstdcsp)
14160 /* Only allow 7-bit unsigned literals. */
14161 STRIP_NOPS (arg1);
14162 if (TREE_CODE (arg1) != INTEGER_CST
14163 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14165 error ("argument 2 must be a 7-bit unsigned literal");
14166 return CONST0_RTX (tmode);
14170 if (target == 0
14171 || GET_MODE (target) != tmode
14172 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14173 target = gen_reg_rtx (tmode);
14175 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14176 op0 = copy_to_mode_reg (mode0, op0);
14177 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14178 op1 = copy_to_mode_reg (mode1, op1);
14180 pat = GEN_FCN (icode) (target, op0, op1);
14181 if (! pat)
14182 return 0;
14183 emit_insn (pat);
14185 return target;
14188 static rtx
14189 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14191 rtx pat, scratch;
14192 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14193 tree arg0 = CALL_EXPR_ARG (exp, 1);
14194 tree arg1 = CALL_EXPR_ARG (exp, 2);
14195 rtx op0 = expand_normal (arg0);
14196 rtx op1 = expand_normal (arg1);
14197 machine_mode tmode = SImode;
14198 machine_mode mode0 = insn_data[icode].operand[1].mode;
14199 machine_mode mode1 = insn_data[icode].operand[2].mode;
14200 int cr6_form_int;
14202 if (TREE_CODE (cr6_form) != INTEGER_CST)
14204 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14205 return const0_rtx;
14207 else
14208 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14210 gcc_assert (mode0 == mode1);
14212 /* If we have invalid arguments, bail out before generating bad rtl. */
14213 if (arg0 == error_mark_node || arg1 == error_mark_node)
14214 return const0_rtx;
14216 if (target == 0
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14222 op0 = copy_to_mode_reg (mode0, op0);
14223 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14224 op1 = copy_to_mode_reg (mode1, op1);
14226 /* Note that for many of the relevant operations (e.g. cmpne or
14227 cmpeq) with float or double operands, it makes more sense for the
14228 mode of the allocated scratch register to select a vector of
14229 integer. But the choice to copy the mode of operand 0 was made
14230 long ago and there are no plans to change it. */
14231 scratch = gen_reg_rtx (mode0);
14233 pat = GEN_FCN (icode) (scratch, op0, op1);
14234 if (! pat)
14235 return 0;
14236 emit_insn (pat);
14238 /* The vec_any* and vec_all* predicates use the same opcodes for two
14239 different operations, but the bits in CR6 will be different
14240 depending on what information we want. So we have to play tricks
14241 with CR6 to get the right bits out.
14243 If you think this is disgusting, look at the specs for the
14244 AltiVec predicates. */
14246 switch (cr6_form_int)
14248 case 0:
14249 emit_insn (gen_cr6_test_for_zero (target));
14250 break;
14251 case 1:
14252 emit_insn (gen_cr6_test_for_zero_reverse (target));
14253 break;
14254 case 2:
14255 emit_insn (gen_cr6_test_for_lt (target));
14256 break;
14257 case 3:
14258 emit_insn (gen_cr6_test_for_lt_reverse (target));
14259 break;
14260 default:
14261 error ("argument 1 of __builtin_altivec_predicate is out of range");
14262 break;
14265 return target;
14268 static rtx
14269 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14271 rtx pat, addr;
14272 tree arg0 = CALL_EXPR_ARG (exp, 0);
14273 tree arg1 = CALL_EXPR_ARG (exp, 1);
14274 machine_mode tmode = insn_data[icode].operand[0].mode;
14275 machine_mode mode0 = Pmode;
14276 machine_mode mode1 = Pmode;
14277 rtx op0 = expand_normal (arg0);
14278 rtx op1 = expand_normal (arg1);
14280 if (icode == CODE_FOR_nothing)
14281 /* Builtin not supported on this processor. */
14282 return 0;
14284 /* If we got invalid arguments bail out before generating bad rtl. */
14285 if (arg0 == error_mark_node || arg1 == error_mark_node)
14286 return const0_rtx;
14288 if (target == 0
14289 || GET_MODE (target) != tmode
14290 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14291 target = gen_reg_rtx (tmode);
14293 op1 = copy_to_mode_reg (mode1, op1);
14295 if (op0 == const0_rtx)
14297 addr = gen_rtx_MEM (tmode, op1);
14299 else
14301 op0 = copy_to_mode_reg (mode0, op0);
14302 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14305 pat = GEN_FCN (icode) (target, addr);
14307 if (! pat)
14308 return 0;
14309 emit_insn (pat);
14311 return target;
14314 /* Return a constant vector for use as a little-endian permute control vector
14315 to reverse the order of elements of the given vector mode. */
14316 static rtx
14317 swap_selector_for_mode (machine_mode mode)
14319 /* These are little endian vectors, so their elements are reversed
14320 from what you would normally expect for a permute control vector. */
14321 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14322 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14323 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14324 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14325 unsigned int *swaparray, i;
14326 rtx perm[16];
14328 switch (mode)
14330 case V2DFmode:
14331 case V2DImode:
14332 swaparray = swap2;
14333 break;
14334 case V4SFmode:
14335 case V4SImode:
14336 swaparray = swap4;
14337 break;
14338 case V8HImode:
14339 swaparray = swap8;
14340 break;
14341 case V16QImode:
14342 swaparray = swap16;
14343 break;
14344 default:
14345 gcc_unreachable ();
14348 for (i = 0; i < 16; ++i)
14349 perm[i] = GEN_INT (swaparray[i]);
14351 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14354 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14355 with -maltivec=be specified. Issue the load followed by an element-
14356 reversing permute. */
14357 void
14358 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14360 rtx tmp = gen_reg_rtx (mode);
14361 rtx load = gen_rtx_SET (tmp, op1);
14362 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14363 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14364 rtx sel = swap_selector_for_mode (mode);
14365 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14367 gcc_assert (REG_P (op0));
14368 emit_insn (par);
14369 emit_insn (gen_rtx_SET (op0, vperm));
14372 /* Generate code for a "stvxl" built-in for a little endian target with
14373 -maltivec=be specified. Issue the store preceded by an element-reversing
14374 permute. */
14375 void
14376 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14378 rtx tmp = gen_reg_rtx (mode);
14379 rtx store = gen_rtx_SET (op0, tmp);
14380 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14381 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14382 rtx sel = swap_selector_for_mode (mode);
14383 rtx vperm;
14385 gcc_assert (REG_P (op1));
14386 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14387 emit_insn (gen_rtx_SET (tmp, vperm));
14388 emit_insn (par);
14391 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14392 specified. Issue the store preceded by an element-reversing permute. */
14393 void
14394 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14396 machine_mode inner_mode = GET_MODE_INNER (mode);
14397 rtx tmp = gen_reg_rtx (mode);
14398 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14399 rtx sel = swap_selector_for_mode (mode);
14400 rtx vperm;
14402 gcc_assert (REG_P (op1));
14403 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14404 emit_insn (gen_rtx_SET (tmp, vperm));
14405 emit_insn (gen_rtx_SET (op0, stvx));
14408 static rtx
14409 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14411 rtx pat, addr;
14412 tree arg0 = CALL_EXPR_ARG (exp, 0);
14413 tree arg1 = CALL_EXPR_ARG (exp, 1);
14414 machine_mode tmode = insn_data[icode].operand[0].mode;
14415 machine_mode mode0 = Pmode;
14416 machine_mode mode1 = Pmode;
14417 rtx op0 = expand_normal (arg0);
14418 rtx op1 = expand_normal (arg1);
14420 if (icode == CODE_FOR_nothing)
14421 /* Builtin not supported on this processor. */
14422 return 0;
14424 /* If we got invalid arguments bail out before generating bad rtl. */
14425 if (arg0 == error_mark_node || arg1 == error_mark_node)
14426 return const0_rtx;
14428 if (target == 0
14429 || GET_MODE (target) != tmode
14430 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14431 target = gen_reg_rtx (tmode);
14433 op1 = copy_to_mode_reg (mode1, op1);
14435 /* For LVX, express the RTL accurately by ANDing the address with -16.
14436 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14437 so the raw address is fine. */
14438 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14439 || icode == CODE_FOR_altivec_lvx_v2di_2op
14440 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14441 || icode == CODE_FOR_altivec_lvx_v4si_2op
14442 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14443 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14445 rtx rawaddr;
14446 if (op0 == const0_rtx)
14447 rawaddr = op1;
14448 else
14450 op0 = copy_to_mode_reg (mode0, op0);
14451 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14453 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14454 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14456 /* For -maltivec=be, emit the load and follow it up with a
14457 permute to swap the elements. */
14458 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14460 rtx temp = gen_reg_rtx (tmode);
14461 emit_insn (gen_rtx_SET (temp, addr));
14463 rtx sel = swap_selector_for_mode (tmode);
14464 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14465 UNSPEC_VPERM);
14466 emit_insn (gen_rtx_SET (target, vperm));
14468 else
14469 emit_insn (gen_rtx_SET (target, addr));
14471 else
14473 if (op0 == const0_rtx)
14474 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14475 else
14477 op0 = copy_to_mode_reg (mode0, op0);
14478 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14479 gen_rtx_PLUS (Pmode, op1, op0));
14482 pat = GEN_FCN (icode) (target, addr);
14483 if (! pat)
14484 return 0;
14485 emit_insn (pat);
14488 return target;
14491 static rtx
14492 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14494 tree arg0 = CALL_EXPR_ARG (exp, 0);
14495 tree arg1 = CALL_EXPR_ARG (exp, 1);
14496 tree arg2 = CALL_EXPR_ARG (exp, 2);
14497 rtx op0 = expand_normal (arg0);
14498 rtx op1 = expand_normal (arg1);
14499 rtx op2 = expand_normal (arg2);
14500 rtx pat;
14501 machine_mode mode0 = insn_data[icode].operand[0].mode;
14502 machine_mode mode1 = insn_data[icode].operand[1].mode;
14503 machine_mode mode2 = insn_data[icode].operand[2].mode;
14505 /* Invalid arguments. Bail before doing anything stoopid! */
14506 if (arg0 == error_mark_node
14507 || arg1 == error_mark_node
14508 || arg2 == error_mark_node)
14509 return const0_rtx;
14511 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14512 op0 = copy_to_mode_reg (mode2, op0);
14513 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14514 op1 = copy_to_mode_reg (mode0, op1);
14515 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14516 op2 = copy_to_mode_reg (mode1, op2);
14518 pat = GEN_FCN (icode) (op1, op2, op0);
14519 if (pat)
14520 emit_insn (pat);
14521 return NULL_RTX;
14524 static rtx
14525 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14527 tree arg0 = CALL_EXPR_ARG (exp, 0);
14528 tree arg1 = CALL_EXPR_ARG (exp, 1);
14529 tree arg2 = CALL_EXPR_ARG (exp, 2);
14530 rtx op0 = expand_normal (arg0);
14531 rtx op1 = expand_normal (arg1);
14532 rtx op2 = expand_normal (arg2);
14533 rtx pat, addr;
14534 machine_mode tmode = insn_data[icode].operand[0].mode;
14535 machine_mode mode1 = Pmode;
14536 machine_mode mode2 = Pmode;
14538 /* Invalid arguments. Bail before doing anything stoopid! */
14539 if (arg0 == error_mark_node
14540 || arg1 == error_mark_node
14541 || arg2 == error_mark_node)
14542 return const0_rtx;
14544 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14545 op0 = copy_to_mode_reg (tmode, op0);
14547 op2 = copy_to_mode_reg (mode2, op2);
14549 if (op1 == const0_rtx)
14551 addr = gen_rtx_MEM (tmode, op2);
14553 else
14555 op1 = copy_to_mode_reg (mode1, op1);
14556 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14559 pat = GEN_FCN (icode) (addr, op0);
14560 if (pat)
14561 emit_insn (pat);
14562 return NULL_RTX;
14565 static rtx
14566 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14568 rtx pat;
14569 tree arg0 = CALL_EXPR_ARG (exp, 0);
14570 tree arg1 = CALL_EXPR_ARG (exp, 1);
14571 tree arg2 = CALL_EXPR_ARG (exp, 2);
14572 rtx op0 = expand_normal (arg0);
14573 rtx op1 = expand_normal (arg1);
14574 rtx op2 = expand_normal (arg2);
14575 machine_mode mode0 = insn_data[icode].operand[0].mode;
14576 machine_mode mode1 = insn_data[icode].operand[1].mode;
14577 machine_mode mode2 = insn_data[icode].operand[2].mode;
14579 if (icode == CODE_FOR_nothing)
14580 /* Builtin not supported on this processor. */
14581 return NULL_RTX;
14583 /* If we got invalid arguments bail out before generating bad rtl. */
14584 if (arg0 == error_mark_node
14585 || arg1 == error_mark_node
14586 || arg2 == error_mark_node)
14587 return NULL_RTX;
14589 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14590 op0 = copy_to_mode_reg (mode0, op0);
14591 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14592 op1 = copy_to_mode_reg (mode1, op1);
14593 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14594 op2 = copy_to_mode_reg (mode2, op2);
14596 pat = GEN_FCN (icode) (op0, op1, op2);
14597 if (pat)
14598 emit_insn (pat);
14600 return NULL_RTX;
14603 static rtx
14604 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14606 tree arg0 = CALL_EXPR_ARG (exp, 0);
14607 tree arg1 = CALL_EXPR_ARG (exp, 1);
14608 tree arg2 = CALL_EXPR_ARG (exp, 2);
14609 rtx op0 = expand_normal (arg0);
14610 rtx op1 = expand_normal (arg1);
14611 rtx op2 = expand_normal (arg2);
14612 rtx pat, addr, rawaddr;
14613 machine_mode tmode = insn_data[icode].operand[0].mode;
14614 machine_mode smode = insn_data[icode].operand[1].mode;
14615 machine_mode mode1 = Pmode;
14616 machine_mode mode2 = Pmode;
14618 /* Invalid arguments. Bail before doing anything stoopid! */
14619 if (arg0 == error_mark_node
14620 || arg1 == error_mark_node
14621 || arg2 == error_mark_node)
14622 return const0_rtx;
14624 op2 = copy_to_mode_reg (mode2, op2);
14626 /* For STVX, express the RTL accurately by ANDing the address with -16.
14627 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14628 so the raw address is fine. */
14629 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14630 || icode == CODE_FOR_altivec_stvx_v2di_2op
14631 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14632 || icode == CODE_FOR_altivec_stvx_v4si_2op
14633 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14634 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14636 if (op1 == const0_rtx)
14637 rawaddr = op2;
14638 else
14640 op1 = copy_to_mode_reg (mode1, op1);
14641 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14644 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14645 addr = gen_rtx_MEM (tmode, addr);
14647 op0 = copy_to_mode_reg (tmode, op0);
14649 /* For -maltivec=be, emit a permute to swap the elements, followed
14650 by the store. */
14651 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14653 rtx temp = gen_reg_rtx (tmode);
14654 rtx sel = swap_selector_for_mode (tmode);
14655 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14656 UNSPEC_VPERM);
14657 emit_insn (gen_rtx_SET (temp, vperm));
14658 emit_insn (gen_rtx_SET (addr, temp));
14660 else
14661 emit_insn (gen_rtx_SET (addr, op0));
14663 else
14665 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14666 op0 = copy_to_mode_reg (smode, op0);
14668 if (op1 == const0_rtx)
14669 addr = gen_rtx_MEM (tmode, op2);
14670 else
14672 op1 = copy_to_mode_reg (mode1, op1);
14673 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14676 pat = GEN_FCN (icode) (addr, op0);
14677 if (pat)
14678 emit_insn (pat);
14681 return NULL_RTX;
14684 /* Return the appropriate SPR number associated with the given builtin. */
14685 static inline HOST_WIDE_INT
14686 htm_spr_num (enum rs6000_builtins code)
14688 if (code == HTM_BUILTIN_GET_TFHAR
14689 || code == HTM_BUILTIN_SET_TFHAR)
14690 return TFHAR_SPR;
14691 else if (code == HTM_BUILTIN_GET_TFIAR
14692 || code == HTM_BUILTIN_SET_TFIAR)
14693 return TFIAR_SPR;
14694 else if (code == HTM_BUILTIN_GET_TEXASR
14695 || code == HTM_BUILTIN_SET_TEXASR)
14696 return TEXASR_SPR;
14697 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14698 || code == HTM_BUILTIN_SET_TEXASRU);
14699 return TEXASRU_SPR;
14702 /* Return the appropriate SPR regno associated with the given builtin. */
14703 static inline HOST_WIDE_INT
14704 htm_spr_regno (enum rs6000_builtins code)
14706 if (code == HTM_BUILTIN_GET_TFHAR
14707 || code == HTM_BUILTIN_SET_TFHAR)
14708 return TFHAR_REGNO;
14709 else if (code == HTM_BUILTIN_GET_TFIAR
14710 || code == HTM_BUILTIN_SET_TFIAR)
14711 return TFIAR_REGNO;
14712 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14713 || code == HTM_BUILTIN_SET_TEXASR
14714 || code == HTM_BUILTIN_GET_TEXASRU
14715 || code == HTM_BUILTIN_SET_TEXASRU);
14716 return TEXASR_REGNO;
14719 /* Return the correct ICODE value depending on whether we are
14720 setting or reading the HTM SPRs. */
14721 static inline enum insn_code
14722 rs6000_htm_spr_icode (bool nonvoid)
14724 if (nonvoid)
14725 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14726 else
14727 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14730 /* Expand the HTM builtin in EXP and store the result in TARGET.
14731 Store true in *EXPANDEDP if we found a builtin to expand. */
14732 static rtx
14733 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14735 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14736 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14737 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14738 const struct builtin_description *d;
14739 size_t i;
14741 *expandedp = true;
14743 if (!TARGET_POWERPC64
14744 && (fcode == HTM_BUILTIN_TABORTDC
14745 || fcode == HTM_BUILTIN_TABORTDCI))
14747 size_t uns_fcode = (size_t)fcode;
14748 const char *name = rs6000_builtin_info[uns_fcode].name;
14749 error ("builtin %s is only valid in 64-bit mode", name);
14750 return const0_rtx;
14753 /* Expand the HTM builtins. */
14754 d = bdesc_htm;
14755 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14756 if (d->code == fcode)
14758 rtx op[MAX_HTM_OPERANDS], pat;
14759 int nopnds = 0;
14760 tree arg;
14761 call_expr_arg_iterator iter;
14762 unsigned attr = rs6000_builtin_info[fcode].attr;
14763 enum insn_code icode = d->icode;
14764 const struct insn_operand_data *insn_op;
14765 bool uses_spr = (attr & RS6000_BTC_SPR);
14766 rtx cr = NULL_RTX;
14768 if (uses_spr)
14769 icode = rs6000_htm_spr_icode (nonvoid);
14770 insn_op = &insn_data[icode].operand[0];
14772 if (nonvoid)
14774 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14775 if (!target
14776 || GET_MODE (target) != tmode
14777 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14778 target = gen_reg_rtx (tmode);
14779 if (uses_spr)
14780 op[nopnds++] = target;
14783 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14785 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14786 return const0_rtx;
14788 insn_op = &insn_data[icode].operand[nopnds];
14790 op[nopnds] = expand_normal (arg);
14792 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14794 if (!strcmp (insn_op->constraint, "n"))
14796 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14797 if (!CONST_INT_P (op[nopnds]))
14798 error ("argument %d must be an unsigned literal", arg_num);
14799 else
14800 error ("argument %d is an unsigned literal that is "
14801 "out of range", arg_num);
14802 return const0_rtx;
14804 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14807 nopnds++;
14810 /* Handle the builtins for extended mnemonics. These accept
14811 no arguments, but map to builtins that take arguments. */
14812 switch (fcode)
14814 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14815 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14816 op[nopnds++] = GEN_INT (1);
14817 if (flag_checking)
14818 attr |= RS6000_BTC_UNARY;
14819 break;
14820 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14821 op[nopnds++] = GEN_INT (0);
14822 if (flag_checking)
14823 attr |= RS6000_BTC_UNARY;
14824 break;
14825 default:
14826 break;
14829 /* If this builtin accesses SPRs, then pass in the appropriate
14830 SPR number and SPR regno as the last two operands. */
14831 if (uses_spr)
14833 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14834 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14835 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14837 /* If this builtin accesses a CR, then pass in a scratch
14838 CR as the last operand. */
14839 else if (attr & RS6000_BTC_CR)
14840 { cr = gen_reg_rtx (CCmode);
14841 op[nopnds++] = cr;
14844 if (flag_checking)
14846 int expected_nopnds = 0;
14847 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14848 expected_nopnds = 1;
14849 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14850 expected_nopnds = 2;
14851 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14852 expected_nopnds = 3;
14853 if (!(attr & RS6000_BTC_VOID))
14854 expected_nopnds += 1;
14855 if (uses_spr)
14856 expected_nopnds += 2;
14858 gcc_assert (nopnds == expected_nopnds
14859 && nopnds <= MAX_HTM_OPERANDS);
14862 switch (nopnds)
14864 case 1:
14865 pat = GEN_FCN (icode) (op[0]);
14866 break;
14867 case 2:
14868 pat = GEN_FCN (icode) (op[0], op[1]);
14869 break;
14870 case 3:
14871 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14872 break;
14873 case 4:
14874 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14875 break;
14876 default:
14877 gcc_unreachable ();
14879 if (!pat)
14880 return NULL_RTX;
14881 emit_insn (pat);
14883 if (attr & RS6000_BTC_CR)
14885 if (fcode == HTM_BUILTIN_TBEGIN)
14887 /* Emit code to set TARGET to true or false depending on
14888 whether the tbegin. instruction successfully or failed
14889 to start a transaction. We do this by placing the 1's
14890 complement of CR's EQ bit into TARGET. */
14891 rtx scratch = gen_reg_rtx (SImode);
14892 emit_insn (gen_rtx_SET (scratch,
14893 gen_rtx_EQ (SImode, cr,
14894 const0_rtx)));
14895 emit_insn (gen_rtx_SET (target,
14896 gen_rtx_XOR (SImode, scratch,
14897 GEN_INT (1))));
14899 else
14901 /* Emit code to copy the 4-bit condition register field
14902 CR into the least significant end of register TARGET. */
14903 rtx scratch1 = gen_reg_rtx (SImode);
14904 rtx scratch2 = gen_reg_rtx (SImode);
14905 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14906 emit_insn (gen_movcc (subreg, cr));
14907 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14908 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14912 if (nonvoid)
14913 return target;
14914 return const0_rtx;
14917 *expandedp = false;
14918 return NULL_RTX;
14921 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14923 static rtx
14924 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14925 rtx target)
14927 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14928 if (fcode == RS6000_BUILTIN_CPU_INIT)
14929 return const0_rtx;
14931 if (target == 0 || GET_MODE (target) != SImode)
14932 target = gen_reg_rtx (SImode);
14934 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14935 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14936 if (TREE_CODE (arg) != STRING_CST)
14938 error ("builtin %s only accepts a string argument",
14939 rs6000_builtin_info[(size_t) fcode].name);
14940 return const0_rtx;
14943 if (fcode == RS6000_BUILTIN_CPU_IS)
14945 const char *cpu = TREE_STRING_POINTER (arg);
14946 rtx cpuid = NULL_RTX;
14947 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14948 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14950 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14951 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14952 break;
14954 if (cpuid == NULL_RTX)
14956 /* Invalid CPU argument. */
14957 error ("cpu %s is an invalid argument to builtin %s",
14958 cpu, rs6000_builtin_info[(size_t) fcode].name);
14959 return const0_rtx;
14962 rtx platform = gen_reg_rtx (SImode);
14963 rtx tcbmem = gen_const_mem (SImode,
14964 gen_rtx_PLUS (Pmode,
14965 gen_rtx_REG (Pmode, TLS_REGNUM),
14966 GEN_INT (TCB_PLATFORM_OFFSET)));
14967 emit_move_insn (platform, tcbmem);
14968 emit_insn (gen_eqsi3 (target, platform, cpuid));
14970 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14972 const char *hwcap = TREE_STRING_POINTER (arg);
14973 rtx mask = NULL_RTX;
14974 int hwcap_offset;
14975 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14976 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14978 mask = GEN_INT (cpu_supports_info[i].mask);
14979 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14980 break;
14982 if (mask == NULL_RTX)
14984 /* Invalid HWCAP argument. */
14985 error ("hwcap %s is an invalid argument to builtin %s",
14986 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14987 return const0_rtx;
14990 rtx tcb_hwcap = gen_reg_rtx (SImode);
14991 rtx tcbmem = gen_const_mem (SImode,
14992 gen_rtx_PLUS (Pmode,
14993 gen_rtx_REG (Pmode, TLS_REGNUM),
14994 GEN_INT (hwcap_offset)));
14995 emit_move_insn (tcb_hwcap, tcbmem);
14996 rtx scratch1 = gen_reg_rtx (SImode);
14997 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14998 rtx scratch2 = gen_reg_rtx (SImode);
14999 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15000 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15003 /* Record that we have expanded a CPU builtin, so that we can later
15004 emit a reference to the special symbol exported by LIBC to ensure we
15005 do not link against an old LIBC that doesn't support this feature. */
15006 cpu_builtin_p = true;
15008 #else
15009 /* For old LIBCs, always return FALSE. */
15010 emit_move_insn (target, GEN_INT (0));
15011 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15013 return target;
15016 static rtx
15017 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15019 rtx pat;
15020 tree arg0 = CALL_EXPR_ARG (exp, 0);
15021 tree arg1 = CALL_EXPR_ARG (exp, 1);
15022 tree arg2 = CALL_EXPR_ARG (exp, 2);
15023 rtx op0 = expand_normal (arg0);
15024 rtx op1 = expand_normal (arg1);
15025 rtx op2 = expand_normal (arg2);
15026 machine_mode tmode = insn_data[icode].operand[0].mode;
15027 machine_mode mode0 = insn_data[icode].operand[1].mode;
15028 machine_mode mode1 = insn_data[icode].operand[2].mode;
15029 machine_mode mode2 = insn_data[icode].operand[3].mode;
15031 if (icode == CODE_FOR_nothing)
15032 /* Builtin not supported on this processor. */
15033 return 0;
15035 /* If we got invalid arguments bail out before generating bad rtl. */
15036 if (arg0 == error_mark_node
15037 || arg1 == error_mark_node
15038 || arg2 == error_mark_node)
15039 return const0_rtx;
15041 /* Check and prepare argument depending on the instruction code.
15043 Note that a switch statement instead of the sequence of tests
15044 would be incorrect as many of the CODE_FOR values could be
15045 CODE_FOR_nothing and that would yield multiple alternatives
15046 with identical values. We'd never reach here at runtime in
15047 this case. */
15048 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15049 || icode == CODE_FOR_altivec_vsldoi_v4si
15050 || icode == CODE_FOR_altivec_vsldoi_v8hi
15051 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15053 /* Only allow 4-bit unsigned literals. */
15054 STRIP_NOPS (arg2);
15055 if (TREE_CODE (arg2) != INTEGER_CST
15056 || TREE_INT_CST_LOW (arg2) & ~0xf)
15058 error ("argument 3 must be a 4-bit unsigned literal");
15059 return const0_rtx;
15062 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15063 || icode == CODE_FOR_vsx_xxpermdi_v2di
15064 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15065 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15066 || icode == CODE_FOR_vsx_xxsldwi_v4si
15067 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15068 || icode == CODE_FOR_vsx_xxsldwi_v2di
15069 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15071 /* Only allow 2-bit unsigned literals. */
15072 STRIP_NOPS (arg2);
15073 if (TREE_CODE (arg2) != INTEGER_CST
15074 || TREE_INT_CST_LOW (arg2) & ~0x3)
15076 error ("argument 3 must be a 2-bit unsigned literal");
15077 return const0_rtx;
15080 else if (icode == CODE_FOR_vsx_set_v2df
15081 || icode == CODE_FOR_vsx_set_v2di
15082 || icode == CODE_FOR_bcdadd
15083 || icode == CODE_FOR_bcdadd_lt
15084 || icode == CODE_FOR_bcdadd_eq
15085 || icode == CODE_FOR_bcdadd_gt
15086 || icode == CODE_FOR_bcdsub
15087 || icode == CODE_FOR_bcdsub_lt
15088 || icode == CODE_FOR_bcdsub_eq
15089 || icode == CODE_FOR_bcdsub_gt)
15091 /* Only allow 1-bit unsigned literals. */
15092 STRIP_NOPS (arg2);
15093 if (TREE_CODE (arg2) != INTEGER_CST
15094 || TREE_INT_CST_LOW (arg2) & ~0x1)
15096 error ("argument 3 must be a 1-bit unsigned literal");
15097 return const0_rtx;
15100 else if (icode == CODE_FOR_dfp_ddedpd_dd
15101 || icode == CODE_FOR_dfp_ddedpd_td)
15103 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15104 STRIP_NOPS (arg0);
15105 if (TREE_CODE (arg0) != INTEGER_CST
15106 || TREE_INT_CST_LOW (arg2) & ~0x3)
15108 error ("argument 1 must be 0 or 2");
15109 return const0_rtx;
15112 else if (icode == CODE_FOR_dfp_denbcd_dd
15113 || icode == CODE_FOR_dfp_denbcd_td)
15115 /* Only allow 1-bit unsigned literals. */
15116 STRIP_NOPS (arg0);
15117 if (TREE_CODE (arg0) != INTEGER_CST
15118 || TREE_INT_CST_LOW (arg0) & ~0x1)
15120 error ("argument 1 must be a 1-bit unsigned literal");
15121 return const0_rtx;
15124 else if (icode == CODE_FOR_dfp_dscli_dd
15125 || icode == CODE_FOR_dfp_dscli_td
15126 || icode == CODE_FOR_dfp_dscri_dd
15127 || icode == CODE_FOR_dfp_dscri_td)
15129 /* Only allow 6-bit unsigned literals. */
15130 STRIP_NOPS (arg1);
15131 if (TREE_CODE (arg1) != INTEGER_CST
15132 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15134 error ("argument 2 must be a 6-bit unsigned literal");
15135 return const0_rtx;
15138 else if (icode == CODE_FOR_crypto_vshasigmaw
15139 || icode == CODE_FOR_crypto_vshasigmad)
15141 /* Check whether the 2nd and 3rd arguments are integer constants and in
15142 range and prepare arguments. */
15143 STRIP_NOPS (arg1);
15144 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15146 error ("argument 2 must be 0 or 1");
15147 return const0_rtx;
15150 STRIP_NOPS (arg2);
15151 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
15153 error ("argument 3 must be in the range 0..15");
15154 return const0_rtx;
15158 if (target == 0
15159 || GET_MODE (target) != tmode
15160 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15161 target = gen_reg_rtx (tmode);
15163 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15164 op0 = copy_to_mode_reg (mode0, op0);
15165 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15166 op1 = copy_to_mode_reg (mode1, op1);
15167 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15168 op2 = copy_to_mode_reg (mode2, op2);
15170 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15171 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15172 else
15173 pat = GEN_FCN (icode) (target, op0, op1, op2);
15174 if (! pat)
15175 return 0;
15176 emit_insn (pat);
15178 return target;
15181 /* Expand the lvx builtins. */
15182 static rtx
15183 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15185 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15186 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15187 tree arg0;
15188 machine_mode tmode, mode0;
15189 rtx pat, op0;
15190 enum insn_code icode;
15192 switch (fcode)
15194 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15195 icode = CODE_FOR_vector_altivec_load_v16qi;
15196 break;
15197 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15198 icode = CODE_FOR_vector_altivec_load_v8hi;
15199 break;
15200 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15201 icode = CODE_FOR_vector_altivec_load_v4si;
15202 break;
15203 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15204 icode = CODE_FOR_vector_altivec_load_v4sf;
15205 break;
15206 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15207 icode = CODE_FOR_vector_altivec_load_v2df;
15208 break;
15209 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15210 icode = CODE_FOR_vector_altivec_load_v2di;
15211 break;
15212 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15213 icode = CODE_FOR_vector_altivec_load_v1ti;
15214 break;
15215 default:
15216 *expandedp = false;
15217 return NULL_RTX;
15220 *expandedp = true;
15222 arg0 = CALL_EXPR_ARG (exp, 0);
15223 op0 = expand_normal (arg0);
15224 tmode = insn_data[icode].operand[0].mode;
15225 mode0 = insn_data[icode].operand[1].mode;
15227 if (target == 0
15228 || GET_MODE (target) != tmode
15229 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15230 target = gen_reg_rtx (tmode);
15232 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15235 pat = GEN_FCN (icode) (target, op0);
15236 if (! pat)
15237 return 0;
15238 emit_insn (pat);
15239 return target;
15242 /* Expand the stvx builtins. */
15243 static rtx
15244 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15245 bool *expandedp)
15247 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15248 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15249 tree arg0, arg1;
15250 machine_mode mode0, mode1;
15251 rtx pat, op0, op1;
15252 enum insn_code icode;
15254 switch (fcode)
15256 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15257 icode = CODE_FOR_vector_altivec_store_v16qi;
15258 break;
15259 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15260 icode = CODE_FOR_vector_altivec_store_v8hi;
15261 break;
15262 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15263 icode = CODE_FOR_vector_altivec_store_v4si;
15264 break;
15265 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15266 icode = CODE_FOR_vector_altivec_store_v4sf;
15267 break;
15268 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15269 icode = CODE_FOR_vector_altivec_store_v2df;
15270 break;
15271 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15272 icode = CODE_FOR_vector_altivec_store_v2di;
15273 break;
15274 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15275 icode = CODE_FOR_vector_altivec_store_v1ti;
15276 break;
15277 default:
15278 *expandedp = false;
15279 return NULL_RTX;
15282 arg0 = CALL_EXPR_ARG (exp, 0);
15283 arg1 = CALL_EXPR_ARG (exp, 1);
15284 op0 = expand_normal (arg0);
15285 op1 = expand_normal (arg1);
15286 mode0 = insn_data[icode].operand[0].mode;
15287 mode1 = insn_data[icode].operand[1].mode;
15289 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15290 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15291 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15292 op1 = copy_to_mode_reg (mode1, op1);
15294 pat = GEN_FCN (icode) (op0, op1);
15295 if (pat)
15296 emit_insn (pat);
15298 *expandedp = true;
15299 return NULL_RTX;
15302 /* Expand the dst builtins. */
15303 static rtx
15304 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15305 bool *expandedp)
15307 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15308 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15309 tree arg0, arg1, arg2;
15310 machine_mode mode0, mode1;
15311 rtx pat, op0, op1, op2;
15312 const struct builtin_description *d;
15313 size_t i;
15315 *expandedp = false;
15317 /* Handle DST variants. */
15318 d = bdesc_dst;
15319 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15320 if (d->code == fcode)
15322 arg0 = CALL_EXPR_ARG (exp, 0);
15323 arg1 = CALL_EXPR_ARG (exp, 1);
15324 arg2 = CALL_EXPR_ARG (exp, 2);
15325 op0 = expand_normal (arg0);
15326 op1 = expand_normal (arg1);
15327 op2 = expand_normal (arg2);
15328 mode0 = insn_data[d->icode].operand[0].mode;
15329 mode1 = insn_data[d->icode].operand[1].mode;
15331 /* Invalid arguments, bail out before generating bad rtl. */
15332 if (arg0 == error_mark_node
15333 || arg1 == error_mark_node
15334 || arg2 == error_mark_node)
15335 return const0_rtx;
15337 *expandedp = true;
15338 STRIP_NOPS (arg2);
15339 if (TREE_CODE (arg2) != INTEGER_CST
15340 || TREE_INT_CST_LOW (arg2) & ~0x3)
15342 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15343 return const0_rtx;
15346 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15347 op0 = copy_to_mode_reg (Pmode, op0);
15348 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15349 op1 = copy_to_mode_reg (mode1, op1);
15351 pat = GEN_FCN (d->icode) (op0, op1, op2);
15352 if (pat != 0)
15353 emit_insn (pat);
15355 return NULL_RTX;
15358 return NULL_RTX;
15361 /* Expand vec_init builtin. */
15362 static rtx
15363 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15365 machine_mode tmode = TYPE_MODE (type);
15366 machine_mode inner_mode = GET_MODE_INNER (tmode);
15367 int i, n_elt = GET_MODE_NUNITS (tmode);
15369 gcc_assert (VECTOR_MODE_P (tmode));
15370 gcc_assert (n_elt == call_expr_nargs (exp));
15372 if (!target || !register_operand (target, tmode))
15373 target = gen_reg_rtx (tmode);
15375 /* If we have a vector compromised of a single element, such as V1TImode, do
15376 the initialization directly. */
15377 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15379 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15380 emit_move_insn (target, gen_lowpart (tmode, x));
15382 else
15384 rtvec v = rtvec_alloc (n_elt);
15386 for (i = 0; i < n_elt; ++i)
15388 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15389 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15392 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15395 return target;
15398 /* Return the integer constant in ARG. Constrain it to be in the range
15399 of the subparts of VEC_TYPE; issue an error if not. */
15401 static int
15402 get_element_number (tree vec_type, tree arg)
15404 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15406 if (!tree_fits_uhwi_p (arg)
15407 || (elt = tree_to_uhwi (arg), elt > max))
15409 error ("selector must be an integer constant in the range 0..%wi", max);
15410 return 0;
15413 return elt;
15416 /* Expand vec_set builtin. */
15417 static rtx
15418 altivec_expand_vec_set_builtin (tree exp)
15420 machine_mode tmode, mode1;
15421 tree arg0, arg1, arg2;
15422 int elt;
15423 rtx op0, op1;
15425 arg0 = CALL_EXPR_ARG (exp, 0);
15426 arg1 = CALL_EXPR_ARG (exp, 1);
15427 arg2 = CALL_EXPR_ARG (exp, 2);
15429 tmode = TYPE_MODE (TREE_TYPE (arg0));
15430 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15431 gcc_assert (VECTOR_MODE_P (tmode));
15433 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15434 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15435 elt = get_element_number (TREE_TYPE (arg0), arg2);
15437 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15438 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15440 op0 = force_reg (tmode, op0);
15441 op1 = force_reg (mode1, op1);
15443 rs6000_expand_vector_set (op0, op1, elt);
15445 return op0;
15448 /* Expand vec_ext builtin. */
15449 static rtx
15450 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15452 machine_mode tmode, mode0;
15453 tree arg0, arg1;
15454 rtx op0;
15455 rtx op1;
15457 arg0 = CALL_EXPR_ARG (exp, 0);
15458 arg1 = CALL_EXPR_ARG (exp, 1);
15460 op0 = expand_normal (arg0);
15461 op1 = expand_normal (arg1);
15463 /* Call get_element_number to validate arg1 if it is a constant. */
15464 if (TREE_CODE (arg1) == INTEGER_CST)
15465 (void) get_element_number (TREE_TYPE (arg0), arg1);
15467 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15468 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15469 gcc_assert (VECTOR_MODE_P (mode0));
15471 op0 = force_reg (mode0, op0);
15473 if (optimize || !target || !register_operand (target, tmode))
15474 target = gen_reg_rtx (tmode);
15476 rs6000_expand_vector_extract (target, op0, op1);
15478 return target;
15481 /* Expand the builtin in EXP and store the result in TARGET. Store
15482 true in *EXPANDEDP if we found a builtin to expand. */
15483 static rtx
15484 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15486 const struct builtin_description *d;
15487 size_t i;
15488 enum insn_code icode;
15489 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15490 tree arg0;
15491 rtx op0, pat;
15492 machine_mode tmode, mode0;
15493 enum rs6000_builtins fcode
15494 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15496 if (rs6000_overloaded_builtin_p (fcode))
15498 *expandedp = true;
15499 error ("unresolved overload for Altivec builtin %qF", fndecl);
15501 /* Given it is invalid, just generate a normal call. */
15502 return expand_call (exp, target, false);
15505 target = altivec_expand_ld_builtin (exp, target, expandedp);
15506 if (*expandedp)
15507 return target;
15509 target = altivec_expand_st_builtin (exp, target, expandedp);
15510 if (*expandedp)
15511 return target;
15513 target = altivec_expand_dst_builtin (exp, target, expandedp);
15514 if (*expandedp)
15515 return target;
15517 *expandedp = true;
15519 switch (fcode)
15521 case ALTIVEC_BUILTIN_STVX_V2DF:
15522 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15523 case ALTIVEC_BUILTIN_STVX_V2DI:
15524 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15525 case ALTIVEC_BUILTIN_STVX_V4SF:
15526 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15527 case ALTIVEC_BUILTIN_STVX:
15528 case ALTIVEC_BUILTIN_STVX_V4SI:
15529 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15530 case ALTIVEC_BUILTIN_STVX_V8HI:
15531 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15532 case ALTIVEC_BUILTIN_STVX_V16QI:
15533 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15534 case ALTIVEC_BUILTIN_STVEBX:
15535 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15536 case ALTIVEC_BUILTIN_STVEHX:
15537 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15538 case ALTIVEC_BUILTIN_STVEWX:
15539 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15540 case ALTIVEC_BUILTIN_STVXL_V2DF:
15541 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15542 case ALTIVEC_BUILTIN_STVXL_V2DI:
15543 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15544 case ALTIVEC_BUILTIN_STVXL_V4SF:
15545 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15546 case ALTIVEC_BUILTIN_STVXL:
15547 case ALTIVEC_BUILTIN_STVXL_V4SI:
15548 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15549 case ALTIVEC_BUILTIN_STVXL_V8HI:
15550 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15551 case ALTIVEC_BUILTIN_STVXL_V16QI:
15552 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15554 case ALTIVEC_BUILTIN_STVLX:
15555 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15556 case ALTIVEC_BUILTIN_STVLXL:
15557 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15558 case ALTIVEC_BUILTIN_STVRX:
15559 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15560 case ALTIVEC_BUILTIN_STVRXL:
15561 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15563 case P9V_BUILTIN_STXVL:
15564 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15566 case VSX_BUILTIN_STXVD2X_V1TI:
15567 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15568 case VSX_BUILTIN_STXVD2X_V2DF:
15569 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15570 case VSX_BUILTIN_STXVD2X_V2DI:
15571 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15572 case VSX_BUILTIN_STXVW4X_V4SF:
15573 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15574 case VSX_BUILTIN_STXVW4X_V4SI:
15575 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15576 case VSX_BUILTIN_STXVW4X_V8HI:
15577 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15578 case VSX_BUILTIN_STXVW4X_V16QI:
15579 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15581 /* For the following on big endian, it's ok to use any appropriate
15582 unaligned-supporting store, so use a generic expander. For
15583 little-endian, the exact element-reversing instruction must
15584 be used. */
15585 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15587 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15588 : CODE_FOR_vsx_st_elemrev_v2df);
15589 return altivec_expand_stv_builtin (code, exp);
15591 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15593 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15594 : CODE_FOR_vsx_st_elemrev_v2di);
15595 return altivec_expand_stv_builtin (code, exp);
15597 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15599 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15600 : CODE_FOR_vsx_st_elemrev_v4sf);
15601 return altivec_expand_stv_builtin (code, exp);
15603 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15605 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15606 : CODE_FOR_vsx_st_elemrev_v4si);
15607 return altivec_expand_stv_builtin (code, exp);
15609 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15611 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15612 : CODE_FOR_vsx_st_elemrev_v8hi);
15613 return altivec_expand_stv_builtin (code, exp);
15615 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15617 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15618 : CODE_FOR_vsx_st_elemrev_v16qi);
15619 return altivec_expand_stv_builtin (code, exp);
15622 case ALTIVEC_BUILTIN_MFVSCR:
15623 icode = CODE_FOR_altivec_mfvscr;
15624 tmode = insn_data[icode].operand[0].mode;
15626 if (target == 0
15627 || GET_MODE (target) != tmode
15628 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15629 target = gen_reg_rtx (tmode);
15631 pat = GEN_FCN (icode) (target);
15632 if (! pat)
15633 return 0;
15634 emit_insn (pat);
15635 return target;
15637 case ALTIVEC_BUILTIN_MTVSCR:
15638 icode = CODE_FOR_altivec_mtvscr;
15639 arg0 = CALL_EXPR_ARG (exp, 0);
15640 op0 = expand_normal (arg0);
15641 mode0 = insn_data[icode].operand[0].mode;
15643 /* If we got invalid arguments bail out before generating bad rtl. */
15644 if (arg0 == error_mark_node)
15645 return const0_rtx;
15647 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15648 op0 = copy_to_mode_reg (mode0, op0);
15650 pat = GEN_FCN (icode) (op0);
15651 if (pat)
15652 emit_insn (pat);
15653 return NULL_RTX;
15655 case ALTIVEC_BUILTIN_DSSALL:
15656 emit_insn (gen_altivec_dssall ());
15657 return NULL_RTX;
15659 case ALTIVEC_BUILTIN_DSS:
15660 icode = CODE_FOR_altivec_dss;
15661 arg0 = CALL_EXPR_ARG (exp, 0);
15662 STRIP_NOPS (arg0);
15663 op0 = expand_normal (arg0);
15664 mode0 = insn_data[icode].operand[0].mode;
15666 /* If we got invalid arguments bail out before generating bad rtl. */
15667 if (arg0 == error_mark_node)
15668 return const0_rtx;
15670 if (TREE_CODE (arg0) != INTEGER_CST
15671 || TREE_INT_CST_LOW (arg0) & ~0x3)
15673 error ("argument to dss must be a 2-bit unsigned literal");
15674 return const0_rtx;
15677 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15678 op0 = copy_to_mode_reg (mode0, op0);
15680 emit_insn (gen_altivec_dss (op0));
15681 return NULL_RTX;
15683 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15684 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15685 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15686 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15687 case VSX_BUILTIN_VEC_INIT_V2DF:
15688 case VSX_BUILTIN_VEC_INIT_V2DI:
15689 case VSX_BUILTIN_VEC_INIT_V1TI:
15690 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15692 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15693 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15694 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15695 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15696 case VSX_BUILTIN_VEC_SET_V2DF:
15697 case VSX_BUILTIN_VEC_SET_V2DI:
15698 case VSX_BUILTIN_VEC_SET_V1TI:
15699 return altivec_expand_vec_set_builtin (exp);
15701 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15702 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15703 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15704 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15705 case VSX_BUILTIN_VEC_EXT_V2DF:
15706 case VSX_BUILTIN_VEC_EXT_V2DI:
15707 case VSX_BUILTIN_VEC_EXT_V1TI:
15708 return altivec_expand_vec_ext_builtin (exp, target);
15710 default:
15711 break;
15712 /* Fall through. */
15715 /* Expand abs* operations. */
15716 d = bdesc_abs;
15717 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15718 if (d->code == fcode)
15719 return altivec_expand_abs_builtin (d->icode, exp, target);
15721 /* Expand the AltiVec predicates. */
15722 d = bdesc_altivec_preds;
15723 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15724 if (d->code == fcode)
15725 return altivec_expand_predicate_builtin (d->icode, exp, target);
15727 /* LV* are funky. We initialized them differently. */
15728 switch (fcode)
15730 case ALTIVEC_BUILTIN_LVSL:
15731 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15732 exp, target, false);
15733 case ALTIVEC_BUILTIN_LVSR:
15734 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15735 exp, target, false);
15736 case ALTIVEC_BUILTIN_LVEBX:
15737 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15738 exp, target, false);
15739 case ALTIVEC_BUILTIN_LVEHX:
15740 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15741 exp, target, false);
15742 case ALTIVEC_BUILTIN_LVEWX:
15743 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15744 exp, target, false);
15745 case ALTIVEC_BUILTIN_LVXL_V2DF:
15746 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15747 exp, target, false);
15748 case ALTIVEC_BUILTIN_LVXL_V2DI:
15749 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15750 exp, target, false);
15751 case ALTIVEC_BUILTIN_LVXL_V4SF:
15752 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15753 exp, target, false);
15754 case ALTIVEC_BUILTIN_LVXL:
15755 case ALTIVEC_BUILTIN_LVXL_V4SI:
15756 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15757 exp, target, false);
15758 case ALTIVEC_BUILTIN_LVXL_V8HI:
15759 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15760 exp, target, false);
15761 case ALTIVEC_BUILTIN_LVXL_V16QI:
15762 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15763 exp, target, false);
15764 case ALTIVEC_BUILTIN_LVX_V2DF:
15765 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15766 exp, target, false);
15767 case ALTIVEC_BUILTIN_LVX_V2DI:
15768 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15769 exp, target, false);
15770 case ALTIVEC_BUILTIN_LVX_V4SF:
15771 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15772 exp, target, false);
15773 case ALTIVEC_BUILTIN_LVX:
15774 case ALTIVEC_BUILTIN_LVX_V4SI:
15775 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15776 exp, target, false);
15777 case ALTIVEC_BUILTIN_LVX_V8HI:
15778 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15779 exp, target, false);
15780 case ALTIVEC_BUILTIN_LVX_V16QI:
15781 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15782 exp, target, false);
15783 case ALTIVEC_BUILTIN_LVLX:
15784 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15785 exp, target, true);
15786 case ALTIVEC_BUILTIN_LVLXL:
15787 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15788 exp, target, true);
15789 case ALTIVEC_BUILTIN_LVRX:
15790 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15791 exp, target, true);
15792 case ALTIVEC_BUILTIN_LVRXL:
15793 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15794 exp, target, true);
15795 case VSX_BUILTIN_LXVD2X_V1TI:
15796 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15797 exp, target, false);
15798 case VSX_BUILTIN_LXVD2X_V2DF:
15799 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15800 exp, target, false);
15801 case VSX_BUILTIN_LXVD2X_V2DI:
15802 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15803 exp, target, false);
15804 case VSX_BUILTIN_LXVW4X_V4SF:
15805 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15806 exp, target, false);
15807 case VSX_BUILTIN_LXVW4X_V4SI:
15808 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15809 exp, target, false);
15810 case VSX_BUILTIN_LXVW4X_V8HI:
15811 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15812 exp, target, false);
15813 case VSX_BUILTIN_LXVW4X_V16QI:
15814 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15815 exp, target, false);
15816 /* For the following on big endian, it's ok to use any appropriate
15817 unaligned-supporting load, so use a generic expander. For
15818 little-endian, the exact element-reversing instruction must
15819 be used. */
15820 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15822 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15823 : CODE_FOR_vsx_ld_elemrev_v2df);
15824 return altivec_expand_lv_builtin (code, exp, target, false);
15826 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15828 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15829 : CODE_FOR_vsx_ld_elemrev_v2di);
15830 return altivec_expand_lv_builtin (code, exp, target, false);
15832 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15834 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15835 : CODE_FOR_vsx_ld_elemrev_v4sf);
15836 return altivec_expand_lv_builtin (code, exp, target, false);
15838 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15840 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15841 : CODE_FOR_vsx_ld_elemrev_v4si);
15842 return altivec_expand_lv_builtin (code, exp, target, false);
15844 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15846 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15847 : CODE_FOR_vsx_ld_elemrev_v8hi);
15848 return altivec_expand_lv_builtin (code, exp, target, false);
15850 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15852 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15853 : CODE_FOR_vsx_ld_elemrev_v16qi);
15854 return altivec_expand_lv_builtin (code, exp, target, false);
15856 break;
15857 default:
15858 break;
15859 /* Fall through. */
15862 *expandedp = false;
15863 return NULL_RTX;
15866 /* Expand the builtin in EXP and store the result in TARGET. Store
15867 true in *EXPANDEDP if we found a builtin to expand. */
15868 static rtx
15869 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15871 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15872 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15873 const struct builtin_description *d;
15874 size_t i;
15876 *expandedp = true;
15878 switch (fcode)
15880 case PAIRED_BUILTIN_STX:
15881 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15882 case PAIRED_BUILTIN_LX:
15883 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15884 default:
15885 break;
15886 /* Fall through. */
15889 /* Expand the paired predicates. */
15890 d = bdesc_paired_preds;
15891 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15892 if (d->code == fcode)
15893 return paired_expand_predicate_builtin (d->icode, exp, target);
15895 *expandedp = false;
15896 return NULL_RTX;
15899 /* Binops that need to be initialized manually, but can be expanded
15900 automagically by rs6000_expand_binop_builtin. */
15901 static const struct builtin_description bdesc_2arg_spe[] =
15903 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15904 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15905 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15906 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15907 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15908 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15909 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15910 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15911 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15912 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15913 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15914 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15915 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15916 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15917 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15918 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15919 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15920 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15921 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15922 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15923 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15924 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15927 /* Expand the builtin in EXP and store the result in TARGET. Store
15928 true in *EXPANDEDP if we found a builtin to expand.
15930 This expands the SPE builtins that are not simple unary and binary
15931 operations. */
15932 static rtx
15933 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15935 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15936 tree arg1, arg0;
15937 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15938 enum insn_code icode;
15939 machine_mode tmode, mode0;
15940 rtx pat, op0;
15941 const struct builtin_description *d;
15942 size_t i;
15944 *expandedp = true;
15946 /* Syntax check for a 5-bit unsigned immediate. */
15947 switch (fcode)
15949 case SPE_BUILTIN_EVSTDD:
15950 case SPE_BUILTIN_EVSTDH:
15951 case SPE_BUILTIN_EVSTDW:
15952 case SPE_BUILTIN_EVSTWHE:
15953 case SPE_BUILTIN_EVSTWHO:
15954 case SPE_BUILTIN_EVSTWWE:
15955 case SPE_BUILTIN_EVSTWWO:
15956 arg1 = CALL_EXPR_ARG (exp, 2);
15957 if (TREE_CODE (arg1) != INTEGER_CST
15958 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15960 error ("argument 2 must be a 5-bit unsigned literal");
15961 return const0_rtx;
15963 break;
15964 default:
15965 break;
15968 /* The evsplat*i instructions are not quite generic. */
15969 switch (fcode)
15971 case SPE_BUILTIN_EVSPLATFI:
15972 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15973 exp, target);
15974 case SPE_BUILTIN_EVSPLATI:
15975 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15976 exp, target);
15977 default:
15978 break;
15981 d = bdesc_2arg_spe;
15982 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15983 if (d->code == fcode)
15984 return rs6000_expand_binop_builtin (d->icode, exp, target);
15986 d = bdesc_spe_predicates;
15987 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15988 if (d->code == fcode)
15989 return spe_expand_predicate_builtin (d->icode, exp, target);
15991 d = bdesc_spe_evsel;
15992 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15993 if (d->code == fcode)
15994 return spe_expand_evsel_builtin (d->icode, exp, target);
15996 switch (fcode)
15998 case SPE_BUILTIN_EVSTDDX:
15999 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16000 case SPE_BUILTIN_EVSTDHX:
16001 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16002 case SPE_BUILTIN_EVSTDWX:
16003 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16004 case SPE_BUILTIN_EVSTWHEX:
16005 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16006 case SPE_BUILTIN_EVSTWHOX:
16007 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16008 case SPE_BUILTIN_EVSTWWEX:
16009 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16010 case SPE_BUILTIN_EVSTWWOX:
16011 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16012 case SPE_BUILTIN_EVSTDD:
16013 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16014 case SPE_BUILTIN_EVSTDH:
16015 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16016 case SPE_BUILTIN_EVSTDW:
16017 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16018 case SPE_BUILTIN_EVSTWHE:
16019 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16020 case SPE_BUILTIN_EVSTWHO:
16021 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16022 case SPE_BUILTIN_EVSTWWE:
16023 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16024 case SPE_BUILTIN_EVSTWWO:
16025 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16026 case SPE_BUILTIN_MFSPEFSCR:
16027 icode = CODE_FOR_spe_mfspefscr;
16028 tmode = insn_data[icode].operand[0].mode;
16030 if (target == 0
16031 || GET_MODE (target) != tmode
16032 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16033 target = gen_reg_rtx (tmode);
16035 pat = GEN_FCN (icode) (target);
16036 if (! pat)
16037 return 0;
16038 emit_insn (pat);
16039 return target;
16040 case SPE_BUILTIN_MTSPEFSCR:
16041 icode = CODE_FOR_spe_mtspefscr;
16042 arg0 = CALL_EXPR_ARG (exp, 0);
16043 op0 = expand_normal (arg0);
16044 mode0 = insn_data[icode].operand[0].mode;
16046 if (arg0 == error_mark_node)
16047 return const0_rtx;
16049 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16050 op0 = copy_to_mode_reg (mode0, op0);
16052 pat = GEN_FCN (icode) (op0);
16053 if (pat)
16054 emit_insn (pat);
16055 return NULL_RTX;
16056 default:
16057 break;
16060 *expandedp = false;
16061 return NULL_RTX;
16064 static rtx
16065 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16067 rtx pat, scratch, tmp;
16068 tree form = CALL_EXPR_ARG (exp, 0);
16069 tree arg0 = CALL_EXPR_ARG (exp, 1);
16070 tree arg1 = CALL_EXPR_ARG (exp, 2);
16071 rtx op0 = expand_normal (arg0);
16072 rtx op1 = expand_normal (arg1);
16073 machine_mode mode0 = insn_data[icode].operand[1].mode;
16074 machine_mode mode1 = insn_data[icode].operand[2].mode;
16075 int form_int;
16076 enum rtx_code code;
16078 if (TREE_CODE (form) != INTEGER_CST)
16080 error ("argument 1 of __builtin_paired_predicate must be a constant");
16081 return const0_rtx;
16083 else
16084 form_int = TREE_INT_CST_LOW (form);
16086 gcc_assert (mode0 == mode1);
16088 if (arg0 == error_mark_node || arg1 == error_mark_node)
16089 return const0_rtx;
16091 if (target == 0
16092 || GET_MODE (target) != SImode
16093 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16094 target = gen_reg_rtx (SImode);
16095 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16096 op0 = copy_to_mode_reg (mode0, op0);
16097 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16098 op1 = copy_to_mode_reg (mode1, op1);
16100 scratch = gen_reg_rtx (CCFPmode);
16102 pat = GEN_FCN (icode) (scratch, op0, op1);
16103 if (!pat)
16104 return const0_rtx;
16106 emit_insn (pat);
16108 switch (form_int)
16110 /* LT bit. */
16111 case 0:
16112 code = LT;
16113 break;
16114 /* GT bit. */
16115 case 1:
16116 code = GT;
16117 break;
16118 /* EQ bit. */
16119 case 2:
16120 code = EQ;
16121 break;
16122 /* UN bit. */
16123 case 3:
16124 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16125 return target;
16126 default:
16127 error ("argument 1 of __builtin_paired_predicate is out of range");
16128 return const0_rtx;
16131 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16132 emit_move_insn (target, tmp);
16133 return target;
16136 static rtx
16137 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16139 rtx pat, scratch, tmp;
16140 tree form = CALL_EXPR_ARG (exp, 0);
16141 tree arg0 = CALL_EXPR_ARG (exp, 1);
16142 tree arg1 = CALL_EXPR_ARG (exp, 2);
16143 rtx op0 = expand_normal (arg0);
16144 rtx op1 = expand_normal (arg1);
16145 machine_mode mode0 = insn_data[icode].operand[1].mode;
16146 machine_mode mode1 = insn_data[icode].operand[2].mode;
16147 int form_int;
16148 enum rtx_code code;
16150 if (TREE_CODE (form) != INTEGER_CST)
16152 error ("argument 1 of __builtin_spe_predicate must be a constant");
16153 return const0_rtx;
16155 else
16156 form_int = TREE_INT_CST_LOW (form);
16158 gcc_assert (mode0 == mode1);
16160 if (arg0 == error_mark_node || arg1 == error_mark_node)
16161 return const0_rtx;
16163 if (target == 0
16164 || GET_MODE (target) != SImode
16165 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16166 target = gen_reg_rtx (SImode);
16168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16169 op0 = copy_to_mode_reg (mode0, op0);
16170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16171 op1 = copy_to_mode_reg (mode1, op1);
16173 scratch = gen_reg_rtx (CCmode);
16175 pat = GEN_FCN (icode) (scratch, op0, op1);
16176 if (! pat)
16177 return const0_rtx;
16178 emit_insn (pat);
16180 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16181 _lower_. We use one compare, but look in different bits of the
16182 CR for each variant.
16184 There are 2 elements in each SPE simd type (upper/lower). The CR
16185 bits are set as follows:
16187 BIT0 | BIT 1 | BIT 2 | BIT 3
16188 U | L | (U | L) | (U & L)
16190 So, for an "all" relationship, BIT 3 would be set.
16191 For an "any" relationship, BIT 2 would be set. Etc.
16193 Following traditional nomenclature, these bits map to:
16195 BIT0 | BIT 1 | BIT 2 | BIT 3
16196 LT | GT | EQ | OV
16198 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16201 switch (form_int)
16203 /* All variant. OV bit. */
16204 case 0:
16205 /* We need to get to the OV bit, which is the ORDERED bit. We
16206 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16207 that's ugly and will make validate_condition_mode die.
16208 So let's just use another pattern. */
16209 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16210 return target;
16211 /* Any variant. EQ bit. */
16212 case 1:
16213 code = EQ;
16214 break;
16215 /* Upper variant. LT bit. */
16216 case 2:
16217 code = LT;
16218 break;
16219 /* Lower variant. GT bit. */
16220 case 3:
16221 code = GT;
16222 break;
16223 default:
16224 error ("argument 1 of __builtin_spe_predicate is out of range");
16225 return const0_rtx;
16228 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16229 emit_move_insn (target, tmp);
16231 return target;
16234 /* The evsel builtins look like this:
16236 e = __builtin_spe_evsel_OP (a, b, c, d);
16238 and work like this:
16240 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16241 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16244 static rtx
16245 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16247 rtx pat, scratch;
16248 tree arg0 = CALL_EXPR_ARG (exp, 0);
16249 tree arg1 = CALL_EXPR_ARG (exp, 1);
16250 tree arg2 = CALL_EXPR_ARG (exp, 2);
16251 tree arg3 = CALL_EXPR_ARG (exp, 3);
16252 rtx op0 = expand_normal (arg0);
16253 rtx op1 = expand_normal (arg1);
16254 rtx op2 = expand_normal (arg2);
16255 rtx op3 = expand_normal (arg3);
16256 machine_mode mode0 = insn_data[icode].operand[1].mode;
16257 machine_mode mode1 = insn_data[icode].operand[2].mode;
16259 gcc_assert (mode0 == mode1);
16261 if (arg0 == error_mark_node || arg1 == error_mark_node
16262 || arg2 == error_mark_node || arg3 == error_mark_node)
16263 return const0_rtx;
16265 if (target == 0
16266 || GET_MODE (target) != mode0
16267 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16268 target = gen_reg_rtx (mode0);
16270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16271 op0 = copy_to_mode_reg (mode0, op0);
16272 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16273 op1 = copy_to_mode_reg (mode0, op1);
16274 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16275 op2 = copy_to_mode_reg (mode0, op2);
16276 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16277 op3 = copy_to_mode_reg (mode0, op3);
16279 /* Generate the compare. */
16280 scratch = gen_reg_rtx (CCmode);
16281 pat = GEN_FCN (icode) (scratch, op0, op1);
16282 if (! pat)
16283 return const0_rtx;
16284 emit_insn (pat);
16286 if (mode0 == V2SImode)
16287 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16288 else
16289 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16291 return target;
16294 /* Raise an error message for a builtin function that is called without the
16295 appropriate target options being set. */
16297 static void
16298 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16300 size_t uns_fncode = (size_t)fncode;
16301 const char *name = rs6000_builtin_info[uns_fncode].name;
16302 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16304 gcc_assert (name != NULL);
16305 if ((fnmask & RS6000_BTM_CELL) != 0)
16306 error ("Builtin function %s is only valid for the cell processor", name);
16307 else if ((fnmask & RS6000_BTM_VSX) != 0)
16308 error ("Builtin function %s requires the -mvsx option", name);
16309 else if ((fnmask & RS6000_BTM_HTM) != 0)
16310 error ("Builtin function %s requires the -mhtm option", name);
16311 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16312 error ("Builtin function %s requires the -maltivec option", name);
16313 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16314 error ("Builtin function %s requires the -mpaired option", name);
16315 else if ((fnmask & RS6000_BTM_SPE) != 0)
16316 error ("Builtin function %s requires the -mspe option", name);
16317 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16318 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16319 error ("Builtin function %s requires the -mhard-dfp and"
16320 " -mpower8-vector options", name);
16321 else if ((fnmask & RS6000_BTM_DFP) != 0)
16322 error ("Builtin function %s requires the -mhard-dfp option", name);
16323 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16324 error ("Builtin function %s requires the -mpower8-vector option", name);
16325 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16326 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16327 error ("Builtin function %s requires the -mcpu=power9 and"
16328 " -m64 options", name);
16329 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16330 error ("Builtin function %s requires the -mcpu=power9 option", name);
16331 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16332 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16333 error ("Builtin function %s requires the -mcpu=power9 and"
16334 " -m64 options", name);
16335 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16336 error ("Builtin function %s requires the -mcpu=power9 option", name);
16337 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16338 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16339 error ("Builtin function %s requires the -mhard-float and"
16340 " -mlong-double-128 options", name);
16341 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16342 error ("Builtin function %s requires the -mhard-float option", name);
16343 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16344 error ("Builtin function %s requires the -mfloat128 option", name);
16345 else
16346 error ("Builtin function %s is not supported with the current options",
16347 name);
16350 /* Target hook for early folding of built-ins, shamelessly stolen
16351 from ia64.c. */
16353 static tree
16354 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16355 tree *args, bool ignore ATTRIBUTE_UNUSED)
16357 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16359 enum rs6000_builtins fn_code
16360 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16361 switch (fn_code)
16363 case RS6000_BUILTIN_NANQ:
16364 case RS6000_BUILTIN_NANSQ:
16366 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16367 const char *str = c_getstr (*args);
16368 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16369 REAL_VALUE_TYPE real;
16371 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16372 return build_real (type, real);
16373 return NULL_TREE;
16375 case RS6000_BUILTIN_INFQ:
16376 case RS6000_BUILTIN_HUGE_VALQ:
16378 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16379 REAL_VALUE_TYPE inf;
16380 real_inf (&inf);
16381 return build_real (type, inf);
16383 default:
16384 break;
16387 #ifdef SUBTARGET_FOLD_BUILTIN
16388 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16389 #else
16390 return NULL_TREE;
16391 #endif
16394 /* Expand an expression EXP that calls a built-in function,
16395 with result going to TARGET if that's convenient
16396 (and in mode MODE if that's convenient).
16397 SUBTARGET may be used as the target for computing one of EXP's operands.
16398 IGNORE is nonzero if the value is to be ignored. */
16400 static rtx
16401 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16402 machine_mode mode ATTRIBUTE_UNUSED,
16403 int ignore ATTRIBUTE_UNUSED)
16405 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16406 enum rs6000_builtins fcode
16407 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16408 size_t uns_fcode = (size_t)fcode;
16409 const struct builtin_description *d;
16410 size_t i;
16411 rtx ret;
16412 bool success;
16413 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16414 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16416 if (TARGET_DEBUG_BUILTIN)
16418 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16419 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16420 const char *name2 = ((icode != CODE_FOR_nothing)
16421 ? get_insn_name ((int)icode)
16422 : "nothing");
16423 const char *name3;
16425 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16427 default: name3 = "unknown"; break;
16428 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16429 case RS6000_BTC_UNARY: name3 = "unary"; break;
16430 case RS6000_BTC_BINARY: name3 = "binary"; break;
16431 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16432 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16433 case RS6000_BTC_ABS: name3 = "abs"; break;
16434 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16435 case RS6000_BTC_DST: name3 = "dst"; break;
16439 fprintf (stderr,
16440 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16441 (name1) ? name1 : "---", fcode,
16442 (name2) ? name2 : "---", (int)icode,
16443 name3,
16444 func_valid_p ? "" : ", not valid");
16447 if (!func_valid_p)
16449 rs6000_invalid_builtin (fcode);
16451 /* Given it is invalid, just generate a normal call. */
16452 return expand_call (exp, target, ignore);
16455 switch (fcode)
16457 case RS6000_BUILTIN_RECIP:
16458 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16460 case RS6000_BUILTIN_RECIPF:
16461 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16463 case RS6000_BUILTIN_RSQRTF:
16464 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16466 case RS6000_BUILTIN_RSQRT:
16467 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16469 case POWER7_BUILTIN_BPERMD:
16470 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16471 ? CODE_FOR_bpermd_di
16472 : CODE_FOR_bpermd_si), exp, target);
16474 case RS6000_BUILTIN_GET_TB:
16475 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16476 target);
16478 case RS6000_BUILTIN_MFTB:
16479 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16480 ? CODE_FOR_rs6000_mftb_di
16481 : CODE_FOR_rs6000_mftb_si),
16482 target);
16484 case RS6000_BUILTIN_MFFS:
16485 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16487 case RS6000_BUILTIN_MTFSF:
16488 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16490 case RS6000_BUILTIN_CPU_INIT:
16491 case RS6000_BUILTIN_CPU_IS:
16492 case RS6000_BUILTIN_CPU_SUPPORTS:
16493 return cpu_expand_builtin (fcode, exp, target);
16495 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16496 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16498 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16499 : (int) CODE_FOR_altivec_lvsl_direct);
16500 machine_mode tmode = insn_data[icode].operand[0].mode;
16501 machine_mode mode = insn_data[icode].operand[1].mode;
16502 tree arg;
16503 rtx op, addr, pat;
16505 gcc_assert (TARGET_ALTIVEC);
16507 arg = CALL_EXPR_ARG (exp, 0);
16508 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16509 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16510 addr = memory_address (mode, op);
16511 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16512 op = addr;
16513 else
16515 /* For the load case need to negate the address. */
16516 op = gen_reg_rtx (GET_MODE (addr));
16517 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16519 op = gen_rtx_MEM (mode, op);
16521 if (target == 0
16522 || GET_MODE (target) != tmode
16523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16524 target = gen_reg_rtx (tmode);
16526 pat = GEN_FCN (icode) (target, op);
16527 if (!pat)
16528 return 0;
16529 emit_insn (pat);
16531 return target;
16534 case ALTIVEC_BUILTIN_VCFUX:
16535 case ALTIVEC_BUILTIN_VCFSX:
16536 case ALTIVEC_BUILTIN_VCTUXS:
16537 case ALTIVEC_BUILTIN_VCTSXS:
16538 /* FIXME: There's got to be a nicer way to handle this case than
16539 constructing a new CALL_EXPR. */
16540 if (call_expr_nargs (exp) == 1)
16542 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16543 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16545 break;
16547 default:
16548 break;
16551 if (TARGET_ALTIVEC)
16553 ret = altivec_expand_builtin (exp, target, &success);
16555 if (success)
16556 return ret;
16558 if (TARGET_SPE)
16560 ret = spe_expand_builtin (exp, target, &success);
16562 if (success)
16563 return ret;
16565 if (TARGET_PAIRED_FLOAT)
16567 ret = paired_expand_builtin (exp, target, &success);
16569 if (success)
16570 return ret;
16572 if (TARGET_HTM)
16574 ret = htm_expand_builtin (exp, target, &success);
16576 if (success)
16577 return ret;
16580 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16581 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16582 gcc_assert (attr == RS6000_BTC_UNARY
16583 || attr == RS6000_BTC_BINARY
16584 || attr == RS6000_BTC_TERNARY
16585 || attr == RS6000_BTC_SPECIAL);
16587 /* Handle simple unary operations. */
16588 d = bdesc_1arg;
16589 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16590 if (d->code == fcode)
16591 return rs6000_expand_unop_builtin (d->icode, exp, target);
16593 /* Handle simple binary operations. */
16594 d = bdesc_2arg;
16595 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16596 if (d->code == fcode)
16597 return rs6000_expand_binop_builtin (d->icode, exp, target);
16599 /* Handle simple ternary operations. */
16600 d = bdesc_3arg;
16601 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16602 if (d->code == fcode)
16603 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16605 /* Handle simple no-argument operations. */
16606 d = bdesc_0arg;
16607 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16608 if (d->code == fcode)
16609 return rs6000_expand_zeroop_builtin (d->icode, target);
16611 gcc_unreachable ();
16614 static void
16615 rs6000_init_builtins (void)
16617 tree tdecl;
16618 tree ftype;
16619 machine_mode mode;
16621 if (TARGET_DEBUG_BUILTIN)
16622 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16623 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16624 (TARGET_SPE) ? ", spe" : "",
16625 (TARGET_ALTIVEC) ? ", altivec" : "",
16626 (TARGET_VSX) ? ", vsx" : "");
16628 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16629 V2SF_type_node = build_vector_type (float_type_node, 2);
16630 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16631 V2DF_type_node = build_vector_type (double_type_node, 2);
16632 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16633 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16634 V4SF_type_node = build_vector_type (float_type_node, 4);
16635 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16636 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16638 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16639 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16640 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16641 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16643 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16644 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16645 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16646 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16648 const_str_type_node
16649 = build_pointer_type (build_qualified_type (char_type_node,
16650 TYPE_QUAL_CONST));
16652 /* We use V1TI mode as a special container to hold __int128_t items that
16653 must live in VSX registers. */
16654 if (intTI_type_node)
16656 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16657 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16660 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16661 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16662 'vector unsigned short'. */
16664 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16665 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16666 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16667 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16668 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16670 long_integer_type_internal_node = long_integer_type_node;
16671 long_unsigned_type_internal_node = long_unsigned_type_node;
16672 long_long_integer_type_internal_node = long_long_integer_type_node;
16673 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16674 intQI_type_internal_node = intQI_type_node;
16675 uintQI_type_internal_node = unsigned_intQI_type_node;
16676 intHI_type_internal_node = intHI_type_node;
16677 uintHI_type_internal_node = unsigned_intHI_type_node;
16678 intSI_type_internal_node = intSI_type_node;
16679 uintSI_type_internal_node = unsigned_intSI_type_node;
16680 intDI_type_internal_node = intDI_type_node;
16681 uintDI_type_internal_node = unsigned_intDI_type_node;
16682 intTI_type_internal_node = intTI_type_node;
16683 uintTI_type_internal_node = unsigned_intTI_type_node;
16684 float_type_internal_node = float_type_node;
16685 double_type_internal_node = double_type_node;
16686 long_double_type_internal_node = long_double_type_node;
16687 dfloat64_type_internal_node = dfloat64_type_node;
16688 dfloat128_type_internal_node = dfloat128_type_node;
16689 void_type_internal_node = void_type_node;
16691 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16692 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16693 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16694 format that uses a pair of doubles, depending on the switches and
16695 defaults.
16697 We do not enable the actual __float128 keyword unless the user explicitly
16698 asks for it, because the library support is not yet complete.
16700 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16701 floating point, we need make sure the type is non-zero or else self-test
16702 fails during bootstrap.
16704 We don't register a built-in type for __ibm128 if the type is the same as
16705 long double. Instead we add a #define for __ibm128 in
16706 rs6000_cpu_cpp_builtins to long double. */
16707 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16709 ibm128_float_type_node = make_node (REAL_TYPE);
16710 TYPE_PRECISION (ibm128_float_type_node) = 128;
16711 layout_type (ibm128_float_type_node);
16712 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16714 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16715 "__ibm128");
16717 else
16718 ibm128_float_type_node = long_double_type_node;
16720 if (TARGET_FLOAT128_KEYWORD)
16722 ieee128_float_type_node = float128_type_node;
16723 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16724 "__float128");
16727 else if (TARGET_FLOAT128_TYPE)
16729 ieee128_float_type_node = make_node (REAL_TYPE);
16730 TYPE_PRECISION (ibm128_float_type_node) = 128;
16731 layout_type (ieee128_float_type_node);
16732 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16734 /* If we are not exporting the __float128/_Float128 keywords, we need a
16735 keyword to get the types created. Use __ieee128 as the dummy
16736 keyword. */
16737 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16738 "__ieee128");
16741 else
16742 ieee128_float_type_node = long_double_type_node;
16744 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16745 tree type node. */
16746 builtin_mode_to_type[QImode][0] = integer_type_node;
16747 builtin_mode_to_type[HImode][0] = integer_type_node;
16748 builtin_mode_to_type[SImode][0] = intSI_type_node;
16749 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16750 builtin_mode_to_type[DImode][0] = intDI_type_node;
16751 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16752 builtin_mode_to_type[TImode][0] = intTI_type_node;
16753 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16754 builtin_mode_to_type[SFmode][0] = float_type_node;
16755 builtin_mode_to_type[DFmode][0] = double_type_node;
16756 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16757 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16758 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16759 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16760 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16761 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16762 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16763 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16764 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16765 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16766 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16767 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16768 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16769 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16770 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16771 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16772 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16773 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16774 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16775 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16777 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16778 TYPE_NAME (bool_char_type_node) = tdecl;
16780 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16781 TYPE_NAME (bool_short_type_node) = tdecl;
16783 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16784 TYPE_NAME (bool_int_type_node) = tdecl;
16786 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16787 TYPE_NAME (pixel_type_node) = tdecl;
16789 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16790 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16791 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16792 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16793 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16795 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16796 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16798 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16799 TYPE_NAME (V16QI_type_node) = tdecl;
16801 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16802 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16804 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16805 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16807 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16808 TYPE_NAME (V8HI_type_node) = tdecl;
16810 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16811 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16813 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16814 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16816 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16817 TYPE_NAME (V4SI_type_node) = tdecl;
16819 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16820 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16822 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16823 TYPE_NAME (V4SF_type_node) = tdecl;
16825 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16826 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16828 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16829 TYPE_NAME (V2DF_type_node) = tdecl;
16831 if (TARGET_POWERPC64)
16833 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16834 TYPE_NAME (V2DI_type_node) = tdecl;
16836 tdecl = add_builtin_type ("__vector unsigned long",
16837 unsigned_V2DI_type_node);
16838 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16840 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16841 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16843 else
16845 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16846 TYPE_NAME (V2DI_type_node) = tdecl;
16848 tdecl = add_builtin_type ("__vector unsigned long long",
16849 unsigned_V2DI_type_node);
16850 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16852 tdecl = add_builtin_type ("__vector __bool long long",
16853 bool_V2DI_type_node);
16854 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16857 if (V1TI_type_node)
16859 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16860 TYPE_NAME (V1TI_type_node) = tdecl;
16862 tdecl = add_builtin_type ("__vector unsigned __int128",
16863 unsigned_V1TI_type_node);
16864 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16867 /* Paired and SPE builtins are only available if you build a compiler with
16868 the appropriate options, so only create those builtins with the
16869 appropriate compiler option. Create Altivec and VSX builtins on machines
16870 with at least the general purpose extensions (970 and newer) to allow the
16871 use of the target attribute. */
16872 if (TARGET_PAIRED_FLOAT)
16873 paired_init_builtins ();
16874 if (TARGET_SPE)
16875 spe_init_builtins ();
16876 if (TARGET_EXTRA_BUILTINS)
16877 altivec_init_builtins ();
16878 if (TARGET_HTM)
16879 htm_init_builtins ();
16881 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16882 rs6000_common_init_builtins ();
16884 ftype = build_function_type_list (ieee128_float_type_node,
16885 const_str_type_node, NULL_TREE);
16886 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16887 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16889 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16890 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16891 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16893 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16894 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16895 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16897 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16898 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16899 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16901 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16902 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16903 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16905 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16906 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16907 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16909 mode = (TARGET_64BIT) ? DImode : SImode;
16910 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16911 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16912 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16914 ftype = build_function_type_list (unsigned_intDI_type_node,
16915 NULL_TREE);
16916 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16918 if (TARGET_64BIT)
16919 ftype = build_function_type_list (unsigned_intDI_type_node,
16920 NULL_TREE);
16921 else
16922 ftype = build_function_type_list (unsigned_intSI_type_node,
16923 NULL_TREE);
16924 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16926 ftype = build_function_type_list (double_type_node, NULL_TREE);
16927 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16929 ftype = build_function_type_list (void_type_node,
16930 intSI_type_node, double_type_node,
16931 NULL_TREE);
16932 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16934 ftype = build_function_type_list (void_type_node, NULL_TREE);
16935 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16937 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16938 NULL_TREE);
16939 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16940 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16942 #if TARGET_XCOFF
16943 /* AIX libm provides clog as __clog. */
16944 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16945 set_user_assembler_name (tdecl, "__clog");
16946 #endif
16948 #ifdef SUBTARGET_INIT_BUILTINS
16949 SUBTARGET_INIT_BUILTINS;
16950 #endif
16953 /* Returns the rs6000 builtin decl for CODE. */
16955 static tree
16956 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16958 HOST_WIDE_INT fnmask;
16960 if (code >= RS6000_BUILTIN_COUNT)
16961 return error_mark_node;
16963 fnmask = rs6000_builtin_info[code].mask;
16964 if ((fnmask & rs6000_builtin_mask) != fnmask)
16966 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16967 return error_mark_node;
16970 return rs6000_builtin_decls[code];
16973 static void
16974 spe_init_builtins (void)
16976 tree puint_type_node = build_pointer_type (unsigned_type_node);
16977 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16978 const struct builtin_description *d;
16979 size_t i;
16980 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16982 tree v2si_ftype_4_v2si
16983 = build_function_type_list (opaque_V2SI_type_node,
16984 opaque_V2SI_type_node,
16985 opaque_V2SI_type_node,
16986 opaque_V2SI_type_node,
16987 opaque_V2SI_type_node,
16988 NULL_TREE);
16990 tree v2sf_ftype_4_v2sf
16991 = build_function_type_list (opaque_V2SF_type_node,
16992 opaque_V2SF_type_node,
16993 opaque_V2SF_type_node,
16994 opaque_V2SF_type_node,
16995 opaque_V2SF_type_node,
16996 NULL_TREE);
16998 tree int_ftype_int_v2si_v2si
16999 = build_function_type_list (integer_type_node,
17000 integer_type_node,
17001 opaque_V2SI_type_node,
17002 opaque_V2SI_type_node,
17003 NULL_TREE);
17005 tree int_ftype_int_v2sf_v2sf
17006 = build_function_type_list (integer_type_node,
17007 integer_type_node,
17008 opaque_V2SF_type_node,
17009 opaque_V2SF_type_node,
17010 NULL_TREE);
17012 tree void_ftype_v2si_puint_int
17013 = build_function_type_list (void_type_node,
17014 opaque_V2SI_type_node,
17015 puint_type_node,
17016 integer_type_node,
17017 NULL_TREE);
17019 tree void_ftype_v2si_puint_char
17020 = build_function_type_list (void_type_node,
17021 opaque_V2SI_type_node,
17022 puint_type_node,
17023 char_type_node,
17024 NULL_TREE);
17026 tree void_ftype_v2si_pv2si_int
17027 = build_function_type_list (void_type_node,
17028 opaque_V2SI_type_node,
17029 opaque_p_V2SI_type_node,
17030 integer_type_node,
17031 NULL_TREE);
17033 tree void_ftype_v2si_pv2si_char
17034 = build_function_type_list (void_type_node,
17035 opaque_V2SI_type_node,
17036 opaque_p_V2SI_type_node,
17037 char_type_node,
17038 NULL_TREE);
17040 tree void_ftype_int
17041 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17043 tree int_ftype_void
17044 = build_function_type_list (integer_type_node, NULL_TREE);
17046 tree v2si_ftype_pv2si_int
17047 = build_function_type_list (opaque_V2SI_type_node,
17048 opaque_p_V2SI_type_node,
17049 integer_type_node,
17050 NULL_TREE);
17052 tree v2si_ftype_puint_int
17053 = build_function_type_list (opaque_V2SI_type_node,
17054 puint_type_node,
17055 integer_type_node,
17056 NULL_TREE);
17058 tree v2si_ftype_pushort_int
17059 = build_function_type_list (opaque_V2SI_type_node,
17060 pushort_type_node,
17061 integer_type_node,
17062 NULL_TREE);
17064 tree v2si_ftype_signed_char
17065 = build_function_type_list (opaque_V2SI_type_node,
17066 signed_char_type_node,
17067 NULL_TREE);
17069 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17071 /* Initialize irregular SPE builtins. */
17073 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17074 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17075 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17076 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17077 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17078 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17079 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17080 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17081 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17082 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17083 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
17084 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
17085 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
17086 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
17087 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
17088 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
17089 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
17090 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
17092 /* Loads. */
17093 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
17094 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
17095 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
17096 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
17097 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
17098 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
17099 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
17100 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17101 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17102 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17103 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17104 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17105 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17106 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17107 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17108 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17109 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17110 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17111 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17112 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17113 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17114 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17116 /* Predicates. */
17117 d = bdesc_spe_predicates;
17118 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17120 tree type;
17121 HOST_WIDE_INT mask = d->mask;
17123 if ((mask & builtin_mask) != mask)
17125 if (TARGET_DEBUG_BUILTIN)
17126 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
17127 d->name);
17128 continue;
17131 switch (insn_data[d->icode].operand[1].mode)
17133 case V2SImode:
17134 type = int_ftype_int_v2si_v2si;
17135 break;
17136 case V2SFmode:
17137 type = int_ftype_int_v2sf_v2sf;
17138 break;
17139 default:
17140 gcc_unreachable ();
17143 def_builtin (d->name, type, d->code);
17146 /* Evsel predicates. */
17147 d = bdesc_spe_evsel;
17148 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17150 tree type;
17151 HOST_WIDE_INT mask = d->mask;
17153 if ((mask & builtin_mask) != mask)
17155 if (TARGET_DEBUG_BUILTIN)
17156 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
17157 d->name);
17158 continue;
17161 switch (insn_data[d->icode].operand[1].mode)
17163 case V2SImode:
17164 type = v2si_ftype_4_v2si;
17165 break;
17166 case V2SFmode:
17167 type = v2sf_ftype_4_v2sf;
17168 break;
17169 default:
17170 gcc_unreachable ();
17173 def_builtin (d->name, type, d->code);
17177 static void
17178 paired_init_builtins (void)
17180 const struct builtin_description *d;
17181 size_t i;
17182 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17184 tree int_ftype_int_v2sf_v2sf
17185 = build_function_type_list (integer_type_node,
17186 integer_type_node,
17187 V2SF_type_node,
17188 V2SF_type_node,
17189 NULL_TREE);
17190 tree pcfloat_type_node =
17191 build_pointer_type (build_qualified_type
17192 (float_type_node, TYPE_QUAL_CONST));
17194 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17195 long_integer_type_node,
17196 pcfloat_type_node,
17197 NULL_TREE);
17198 tree void_ftype_v2sf_long_pcfloat =
17199 build_function_type_list (void_type_node,
17200 V2SF_type_node,
17201 long_integer_type_node,
17202 pcfloat_type_node,
17203 NULL_TREE);
17206 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17207 PAIRED_BUILTIN_LX);
17210 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17211 PAIRED_BUILTIN_STX);
17213 /* Predicates. */
17214 d = bdesc_paired_preds;
17215 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17217 tree type;
17218 HOST_WIDE_INT mask = d->mask;
17220 if ((mask & builtin_mask) != mask)
17222 if (TARGET_DEBUG_BUILTIN)
17223 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17224 d->name);
17225 continue;
17228 if (TARGET_DEBUG_BUILTIN)
17229 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17230 (int)i, get_insn_name (d->icode), (int)d->icode,
17231 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17233 switch (insn_data[d->icode].operand[1].mode)
17235 case V2SFmode:
17236 type = int_ftype_int_v2sf_v2sf;
17237 break;
17238 default:
17239 gcc_unreachable ();
17242 def_builtin (d->name, type, d->code);
17246 static void
17247 altivec_init_builtins (void)
17249 const struct builtin_description *d;
17250 size_t i;
17251 tree ftype;
17252 tree decl;
17253 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17255 tree pvoid_type_node = build_pointer_type (void_type_node);
17257 tree pcvoid_type_node
17258 = build_pointer_type (build_qualified_type (void_type_node,
17259 TYPE_QUAL_CONST));
17261 tree int_ftype_opaque
17262 = build_function_type_list (integer_type_node,
17263 opaque_V4SI_type_node, NULL_TREE);
17264 tree opaque_ftype_opaque
17265 = build_function_type_list (integer_type_node, NULL_TREE);
17266 tree opaque_ftype_opaque_int
17267 = build_function_type_list (opaque_V4SI_type_node,
17268 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17269 tree opaque_ftype_opaque_opaque_int
17270 = build_function_type_list (opaque_V4SI_type_node,
17271 opaque_V4SI_type_node, opaque_V4SI_type_node,
17272 integer_type_node, NULL_TREE);
17273 tree opaque_ftype_opaque_opaque_opaque
17274 = build_function_type_list (opaque_V4SI_type_node,
17275 opaque_V4SI_type_node, opaque_V4SI_type_node,
17276 opaque_V4SI_type_node, NULL_TREE);
17277 tree opaque_ftype_opaque_opaque
17278 = build_function_type_list (opaque_V4SI_type_node,
17279 opaque_V4SI_type_node, opaque_V4SI_type_node,
17280 NULL_TREE);
17281 tree int_ftype_int_opaque_opaque
17282 = build_function_type_list (integer_type_node,
17283 integer_type_node, opaque_V4SI_type_node,
17284 opaque_V4SI_type_node, NULL_TREE);
17285 tree int_ftype_int_v4si_v4si
17286 = build_function_type_list (integer_type_node,
17287 integer_type_node, V4SI_type_node,
17288 V4SI_type_node, NULL_TREE);
17289 tree int_ftype_int_v2di_v2di
17290 = build_function_type_list (integer_type_node,
17291 integer_type_node, V2DI_type_node,
17292 V2DI_type_node, NULL_TREE);
17293 tree void_ftype_v4si
17294 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17295 tree v8hi_ftype_void
17296 = build_function_type_list (V8HI_type_node, NULL_TREE);
17297 tree void_ftype_void
17298 = build_function_type_list (void_type_node, NULL_TREE);
17299 tree void_ftype_int
17300 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17302 tree opaque_ftype_long_pcvoid
17303 = build_function_type_list (opaque_V4SI_type_node,
17304 long_integer_type_node, pcvoid_type_node,
17305 NULL_TREE);
17306 tree v16qi_ftype_long_pcvoid
17307 = build_function_type_list (V16QI_type_node,
17308 long_integer_type_node, pcvoid_type_node,
17309 NULL_TREE);
17310 tree v8hi_ftype_long_pcvoid
17311 = build_function_type_list (V8HI_type_node,
17312 long_integer_type_node, pcvoid_type_node,
17313 NULL_TREE);
17314 tree v4si_ftype_long_pcvoid
17315 = build_function_type_list (V4SI_type_node,
17316 long_integer_type_node, pcvoid_type_node,
17317 NULL_TREE);
17318 tree v4sf_ftype_long_pcvoid
17319 = build_function_type_list (V4SF_type_node,
17320 long_integer_type_node, pcvoid_type_node,
17321 NULL_TREE);
17322 tree v2df_ftype_long_pcvoid
17323 = build_function_type_list (V2DF_type_node,
17324 long_integer_type_node, pcvoid_type_node,
17325 NULL_TREE);
17326 tree v2di_ftype_long_pcvoid
17327 = build_function_type_list (V2DI_type_node,
17328 long_integer_type_node, pcvoid_type_node,
17329 NULL_TREE);
17331 tree void_ftype_opaque_long_pvoid
17332 = build_function_type_list (void_type_node,
17333 opaque_V4SI_type_node, long_integer_type_node,
17334 pvoid_type_node, NULL_TREE);
17335 tree void_ftype_v4si_long_pvoid
17336 = build_function_type_list (void_type_node,
17337 V4SI_type_node, long_integer_type_node,
17338 pvoid_type_node, NULL_TREE);
17339 tree void_ftype_v16qi_long_pvoid
17340 = build_function_type_list (void_type_node,
17341 V16QI_type_node, long_integer_type_node,
17342 pvoid_type_node, NULL_TREE);
17344 tree void_ftype_v16qi_pvoid_long
17345 = build_function_type_list (void_type_node,
17346 V16QI_type_node, pvoid_type_node,
17347 long_integer_type_node, NULL_TREE);
17349 tree void_ftype_v8hi_long_pvoid
17350 = build_function_type_list (void_type_node,
17351 V8HI_type_node, long_integer_type_node,
17352 pvoid_type_node, NULL_TREE);
17353 tree void_ftype_v4sf_long_pvoid
17354 = build_function_type_list (void_type_node,
17355 V4SF_type_node, long_integer_type_node,
17356 pvoid_type_node, NULL_TREE);
17357 tree void_ftype_v2df_long_pvoid
17358 = build_function_type_list (void_type_node,
17359 V2DF_type_node, long_integer_type_node,
17360 pvoid_type_node, NULL_TREE);
17361 tree void_ftype_v2di_long_pvoid
17362 = build_function_type_list (void_type_node,
17363 V2DI_type_node, long_integer_type_node,
17364 pvoid_type_node, NULL_TREE);
17365 tree int_ftype_int_v8hi_v8hi
17366 = build_function_type_list (integer_type_node,
17367 integer_type_node, V8HI_type_node,
17368 V8HI_type_node, NULL_TREE);
17369 tree int_ftype_int_v16qi_v16qi
17370 = build_function_type_list (integer_type_node,
17371 integer_type_node, V16QI_type_node,
17372 V16QI_type_node, NULL_TREE);
17373 tree int_ftype_int_v4sf_v4sf
17374 = build_function_type_list (integer_type_node,
17375 integer_type_node, V4SF_type_node,
17376 V4SF_type_node, NULL_TREE);
17377 tree int_ftype_int_v2df_v2df
17378 = build_function_type_list (integer_type_node,
17379 integer_type_node, V2DF_type_node,
17380 V2DF_type_node, NULL_TREE);
17381 tree v2di_ftype_v2di
17382 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17383 tree v4si_ftype_v4si
17384 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17385 tree v8hi_ftype_v8hi
17386 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17387 tree v16qi_ftype_v16qi
17388 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17389 tree v4sf_ftype_v4sf
17390 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17391 tree v2df_ftype_v2df
17392 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17393 tree void_ftype_pcvoid_int_int
17394 = build_function_type_list (void_type_node,
17395 pcvoid_type_node, integer_type_node,
17396 integer_type_node, NULL_TREE);
17398 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17399 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17400 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17401 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17402 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17403 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17404 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17405 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17406 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17407 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17408 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17409 ALTIVEC_BUILTIN_LVXL_V2DF);
17410 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17411 ALTIVEC_BUILTIN_LVXL_V2DI);
17412 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17413 ALTIVEC_BUILTIN_LVXL_V4SF);
17414 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17415 ALTIVEC_BUILTIN_LVXL_V4SI);
17416 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17417 ALTIVEC_BUILTIN_LVXL_V8HI);
17418 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17419 ALTIVEC_BUILTIN_LVXL_V16QI);
17420 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17421 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17422 ALTIVEC_BUILTIN_LVX_V2DF);
17423 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17424 ALTIVEC_BUILTIN_LVX_V2DI);
17425 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17426 ALTIVEC_BUILTIN_LVX_V4SF);
17427 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17428 ALTIVEC_BUILTIN_LVX_V4SI);
17429 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17430 ALTIVEC_BUILTIN_LVX_V8HI);
17431 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17432 ALTIVEC_BUILTIN_LVX_V16QI);
17433 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17434 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17435 ALTIVEC_BUILTIN_STVX_V2DF);
17436 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17437 ALTIVEC_BUILTIN_STVX_V2DI);
17438 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17439 ALTIVEC_BUILTIN_STVX_V4SF);
17440 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17441 ALTIVEC_BUILTIN_STVX_V4SI);
17442 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17443 ALTIVEC_BUILTIN_STVX_V8HI);
17444 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17445 ALTIVEC_BUILTIN_STVX_V16QI);
17446 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17447 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17448 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17449 ALTIVEC_BUILTIN_STVXL_V2DF);
17450 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17451 ALTIVEC_BUILTIN_STVXL_V2DI);
17452 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17453 ALTIVEC_BUILTIN_STVXL_V4SF);
17454 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17455 ALTIVEC_BUILTIN_STVXL_V4SI);
17456 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17457 ALTIVEC_BUILTIN_STVXL_V8HI);
17458 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17459 ALTIVEC_BUILTIN_STVXL_V16QI);
17460 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17461 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17462 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17463 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17464 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17465 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17466 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17467 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17468 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17469 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17470 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17471 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17472 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17473 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17474 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17475 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17477 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17478 VSX_BUILTIN_LXVD2X_V2DF);
17479 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17480 VSX_BUILTIN_LXVD2X_V2DI);
17481 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17482 VSX_BUILTIN_LXVW4X_V4SF);
17483 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17484 VSX_BUILTIN_LXVW4X_V4SI);
17485 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17486 VSX_BUILTIN_LXVW4X_V8HI);
17487 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17488 VSX_BUILTIN_LXVW4X_V16QI);
17489 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17490 VSX_BUILTIN_STXVD2X_V2DF);
17491 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17492 VSX_BUILTIN_STXVD2X_V2DI);
17493 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17494 VSX_BUILTIN_STXVW4X_V4SF);
17495 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17496 VSX_BUILTIN_STXVW4X_V4SI);
17497 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17498 VSX_BUILTIN_STXVW4X_V8HI);
17499 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17500 VSX_BUILTIN_STXVW4X_V16QI);
17502 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17503 VSX_BUILTIN_LD_ELEMREV_V2DF);
17504 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17505 VSX_BUILTIN_LD_ELEMREV_V2DI);
17506 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17507 VSX_BUILTIN_LD_ELEMREV_V4SF);
17508 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17509 VSX_BUILTIN_LD_ELEMREV_V4SI);
17510 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17511 VSX_BUILTIN_ST_ELEMREV_V2DF);
17512 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17513 VSX_BUILTIN_ST_ELEMREV_V2DI);
17514 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17515 VSX_BUILTIN_ST_ELEMREV_V4SF);
17516 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17517 VSX_BUILTIN_ST_ELEMREV_V4SI);
17519 if (TARGET_P9_VECTOR)
17521 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17522 VSX_BUILTIN_LD_ELEMREV_V8HI);
17523 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17524 VSX_BUILTIN_LD_ELEMREV_V16QI);
17525 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17526 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17527 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17528 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17531 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17532 VSX_BUILTIN_VEC_LD);
17533 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17534 VSX_BUILTIN_VEC_ST);
17535 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17536 VSX_BUILTIN_VEC_XL);
17537 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17538 VSX_BUILTIN_VEC_XST);
17540 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17541 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17542 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17544 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17545 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17546 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17547 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17548 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17549 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17550 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17551 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17552 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17553 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17554 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17555 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17557 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17558 ALTIVEC_BUILTIN_VEC_ADDE);
17559 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17560 ALTIVEC_BUILTIN_VEC_ADDEC);
17561 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17562 ALTIVEC_BUILTIN_VEC_CMPNE);
17563 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17564 ALTIVEC_BUILTIN_VEC_MUL);
17566 /* Cell builtins. */
17567 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17568 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17569 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17570 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17572 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17573 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17574 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17575 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17577 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17578 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17579 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17580 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17582 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17583 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17584 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17585 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17587 if (TARGET_P9_VECTOR)
17588 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17589 P9V_BUILTIN_STXVL);
17591 /* Add the DST variants. */
17592 d = bdesc_dst;
17593 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17595 HOST_WIDE_INT mask = d->mask;
17597 if ((mask & builtin_mask) != mask)
17599 if (TARGET_DEBUG_BUILTIN)
17600 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17601 d->name);
17602 continue;
17604 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17607 /* Initialize the predicates. */
17608 d = bdesc_altivec_preds;
17609 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17611 machine_mode mode1;
17612 tree type;
17613 HOST_WIDE_INT mask = d->mask;
17615 if ((mask & builtin_mask) != mask)
17617 if (TARGET_DEBUG_BUILTIN)
17618 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17619 d->name);
17620 continue;
17623 if (rs6000_overloaded_builtin_p (d->code))
17624 mode1 = VOIDmode;
17625 else
17626 mode1 = insn_data[d->icode].operand[1].mode;
17628 switch (mode1)
17630 case VOIDmode:
17631 type = int_ftype_int_opaque_opaque;
17632 break;
17633 case V2DImode:
17634 type = int_ftype_int_v2di_v2di;
17635 break;
17636 case V4SImode:
17637 type = int_ftype_int_v4si_v4si;
17638 break;
17639 case V8HImode:
17640 type = int_ftype_int_v8hi_v8hi;
17641 break;
17642 case V16QImode:
17643 type = int_ftype_int_v16qi_v16qi;
17644 break;
17645 case V4SFmode:
17646 type = int_ftype_int_v4sf_v4sf;
17647 break;
17648 case V2DFmode:
17649 type = int_ftype_int_v2df_v2df;
17650 break;
17651 default:
17652 gcc_unreachable ();
17655 def_builtin (d->name, type, d->code);
17658 /* Initialize the abs* operators. */
17659 d = bdesc_abs;
17660 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17662 machine_mode mode0;
17663 tree type;
17664 HOST_WIDE_INT mask = d->mask;
17666 if ((mask & builtin_mask) != mask)
17668 if (TARGET_DEBUG_BUILTIN)
17669 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17670 d->name);
17671 continue;
17674 mode0 = insn_data[d->icode].operand[0].mode;
17676 switch (mode0)
17678 case V2DImode:
17679 type = v2di_ftype_v2di;
17680 break;
17681 case V4SImode:
17682 type = v4si_ftype_v4si;
17683 break;
17684 case V8HImode:
17685 type = v8hi_ftype_v8hi;
17686 break;
17687 case V16QImode:
17688 type = v16qi_ftype_v16qi;
17689 break;
17690 case V4SFmode:
17691 type = v4sf_ftype_v4sf;
17692 break;
17693 case V2DFmode:
17694 type = v2df_ftype_v2df;
17695 break;
17696 default:
17697 gcc_unreachable ();
17700 def_builtin (d->name, type, d->code);
17703 /* Initialize target builtin that implements
17704 targetm.vectorize.builtin_mask_for_load. */
17706 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17707 v16qi_ftype_long_pcvoid,
17708 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17709 BUILT_IN_MD, NULL, NULL_TREE);
17710 TREE_READONLY (decl) = 1;
17711 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17712 altivec_builtin_mask_for_load = decl;
17714 /* Access to the vec_init patterns. */
17715 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17716 integer_type_node, integer_type_node,
17717 integer_type_node, NULL_TREE);
17718 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17720 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17721 short_integer_type_node,
17722 short_integer_type_node,
17723 short_integer_type_node,
17724 short_integer_type_node,
17725 short_integer_type_node,
17726 short_integer_type_node,
17727 short_integer_type_node, NULL_TREE);
17728 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17730 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17731 char_type_node, char_type_node,
17732 char_type_node, char_type_node,
17733 char_type_node, char_type_node,
17734 char_type_node, char_type_node,
17735 char_type_node, char_type_node,
17736 char_type_node, char_type_node,
17737 char_type_node, char_type_node,
17738 char_type_node, NULL_TREE);
17739 def_builtin ("__builtin_vec_init_v16qi", ftype,
17740 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17742 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17743 float_type_node, float_type_node,
17744 float_type_node, NULL_TREE);
17745 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17747 /* VSX builtins. */
17748 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17749 double_type_node, NULL_TREE);
17750 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17752 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17753 intDI_type_node, NULL_TREE);
17754 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17756 /* Access to the vec_set patterns. */
17757 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17758 intSI_type_node,
17759 integer_type_node, NULL_TREE);
17760 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17762 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17763 intHI_type_node,
17764 integer_type_node, NULL_TREE);
17765 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17767 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17768 intQI_type_node,
17769 integer_type_node, NULL_TREE);
17770 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17772 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17773 float_type_node,
17774 integer_type_node, NULL_TREE);
17775 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17777 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17778 double_type_node,
17779 integer_type_node, NULL_TREE);
17780 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17782 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17783 intDI_type_node,
17784 integer_type_node, NULL_TREE);
17785 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17787 /* Access to the vec_extract patterns. */
17788 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17789 integer_type_node, NULL_TREE);
17790 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17792 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17793 integer_type_node, NULL_TREE);
17794 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17796 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17797 integer_type_node, NULL_TREE);
17798 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17800 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17801 integer_type_node, NULL_TREE);
17802 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17804 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17805 integer_type_node, NULL_TREE);
17806 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17808 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17809 integer_type_node, NULL_TREE);
17810 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17813 if (V1TI_type_node)
17815 tree v1ti_ftype_long_pcvoid
17816 = build_function_type_list (V1TI_type_node,
17817 long_integer_type_node, pcvoid_type_node,
17818 NULL_TREE);
17819 tree void_ftype_v1ti_long_pvoid
17820 = build_function_type_list (void_type_node,
17821 V1TI_type_node, long_integer_type_node,
17822 pvoid_type_node, NULL_TREE);
17823 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17824 VSX_BUILTIN_LXVD2X_V1TI);
17825 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17826 VSX_BUILTIN_STXVD2X_V1TI);
17827 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17828 NULL_TREE, NULL_TREE);
17829 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17830 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17831 intTI_type_node,
17832 integer_type_node, NULL_TREE);
17833 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17834 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17835 integer_type_node, NULL_TREE);
17836 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17841 static void
17842 htm_init_builtins (void)
17844 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17845 const struct builtin_description *d;
17846 size_t i;
17848 d = bdesc_htm;
17849 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17851 tree op[MAX_HTM_OPERANDS], type;
17852 HOST_WIDE_INT mask = d->mask;
17853 unsigned attr = rs6000_builtin_info[d->code].attr;
17854 bool void_func = (attr & RS6000_BTC_VOID);
17855 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17856 int nopnds = 0;
17857 tree gpr_type_node;
17858 tree rettype;
17859 tree argtype;
17861 if (TARGET_32BIT && TARGET_POWERPC64)
17862 gpr_type_node = long_long_unsigned_type_node;
17863 else
17864 gpr_type_node = long_unsigned_type_node;
17866 if (attr & RS6000_BTC_SPR)
17868 rettype = gpr_type_node;
17869 argtype = gpr_type_node;
17871 else if (d->code == HTM_BUILTIN_TABORTDC
17872 || d->code == HTM_BUILTIN_TABORTDCI)
17874 rettype = unsigned_type_node;
17875 argtype = gpr_type_node;
17877 else
17879 rettype = unsigned_type_node;
17880 argtype = unsigned_type_node;
17883 if ((mask & builtin_mask) != mask)
17885 if (TARGET_DEBUG_BUILTIN)
17886 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17887 continue;
17890 if (d->name == 0)
17892 if (TARGET_DEBUG_BUILTIN)
17893 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17894 (long unsigned) i);
17895 continue;
17898 op[nopnds++] = (void_func) ? void_type_node : rettype;
17900 if (attr_args == RS6000_BTC_UNARY)
17901 op[nopnds++] = argtype;
17902 else if (attr_args == RS6000_BTC_BINARY)
17904 op[nopnds++] = argtype;
17905 op[nopnds++] = argtype;
17907 else if (attr_args == RS6000_BTC_TERNARY)
17909 op[nopnds++] = argtype;
17910 op[nopnds++] = argtype;
17911 op[nopnds++] = argtype;
17914 switch (nopnds)
17916 case 1:
17917 type = build_function_type_list (op[0], NULL_TREE);
17918 break;
17919 case 2:
17920 type = build_function_type_list (op[0], op[1], NULL_TREE);
17921 break;
17922 case 3:
17923 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17924 break;
17925 case 4:
17926 type = build_function_type_list (op[0], op[1], op[2], op[3],
17927 NULL_TREE);
17928 break;
17929 default:
17930 gcc_unreachable ();
17933 def_builtin (d->name, type, d->code);
17937 /* Hash function for builtin functions with up to 3 arguments and a return
17938 type. */
17939 hashval_t
17940 builtin_hasher::hash (builtin_hash_struct *bh)
17942 unsigned ret = 0;
17943 int i;
17945 for (i = 0; i < 4; i++)
17947 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17948 ret = (ret * 2) + bh->uns_p[i];
17951 return ret;
17954 /* Compare builtin hash entries H1 and H2 for equivalence. */
17955 bool
17956 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17958 return ((p1->mode[0] == p2->mode[0])
17959 && (p1->mode[1] == p2->mode[1])
17960 && (p1->mode[2] == p2->mode[2])
17961 && (p1->mode[3] == p2->mode[3])
17962 && (p1->uns_p[0] == p2->uns_p[0])
17963 && (p1->uns_p[1] == p2->uns_p[1])
17964 && (p1->uns_p[2] == p2->uns_p[2])
17965 && (p1->uns_p[3] == p2->uns_p[3]));
17968 /* Map types for builtin functions with an explicit return type and up to 3
17969 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17970 of the argument. */
17971 static tree
17972 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17973 machine_mode mode_arg1, machine_mode mode_arg2,
17974 enum rs6000_builtins builtin, const char *name)
17976 struct builtin_hash_struct h;
17977 struct builtin_hash_struct *h2;
17978 int num_args = 3;
17979 int i;
17980 tree ret_type = NULL_TREE;
17981 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17983 /* Create builtin_hash_table. */
17984 if (builtin_hash_table == NULL)
17985 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17987 h.type = NULL_TREE;
17988 h.mode[0] = mode_ret;
17989 h.mode[1] = mode_arg0;
17990 h.mode[2] = mode_arg1;
17991 h.mode[3] = mode_arg2;
17992 h.uns_p[0] = 0;
17993 h.uns_p[1] = 0;
17994 h.uns_p[2] = 0;
17995 h.uns_p[3] = 0;
17997 /* If the builtin is a type that produces unsigned results or takes unsigned
17998 arguments, and it is returned as a decl for the vectorizer (such as
17999 widening multiplies, permute), make sure the arguments and return value
18000 are type correct. */
18001 switch (builtin)
18003 /* unsigned 1 argument functions. */
18004 case CRYPTO_BUILTIN_VSBOX:
18005 case P8V_BUILTIN_VGBBD:
18006 case MISC_BUILTIN_CDTBCD:
18007 case MISC_BUILTIN_CBCDTD:
18008 h.uns_p[0] = 1;
18009 h.uns_p[1] = 1;
18010 break;
18012 /* unsigned 2 argument functions. */
18013 case ALTIVEC_BUILTIN_VMULEUB_UNS:
18014 case ALTIVEC_BUILTIN_VMULEUH_UNS:
18015 case ALTIVEC_BUILTIN_VMULOUB_UNS:
18016 case ALTIVEC_BUILTIN_VMULOUH_UNS:
18017 case CRYPTO_BUILTIN_VCIPHER:
18018 case CRYPTO_BUILTIN_VCIPHERLAST:
18019 case CRYPTO_BUILTIN_VNCIPHER:
18020 case CRYPTO_BUILTIN_VNCIPHERLAST:
18021 case CRYPTO_BUILTIN_VPMSUMB:
18022 case CRYPTO_BUILTIN_VPMSUMH:
18023 case CRYPTO_BUILTIN_VPMSUMW:
18024 case CRYPTO_BUILTIN_VPMSUMD:
18025 case CRYPTO_BUILTIN_VPMSUM:
18026 case MISC_BUILTIN_ADDG6S:
18027 case MISC_BUILTIN_DIVWEU:
18028 case MISC_BUILTIN_DIVWEUO:
18029 case MISC_BUILTIN_DIVDEU:
18030 case MISC_BUILTIN_DIVDEUO:
18031 h.uns_p[0] = 1;
18032 h.uns_p[1] = 1;
18033 h.uns_p[2] = 1;
18034 break;
18036 /* unsigned 3 argument functions. */
18037 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18038 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18039 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18040 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18041 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18042 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18043 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18044 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18045 case VSX_BUILTIN_VPERM_16QI_UNS:
18046 case VSX_BUILTIN_VPERM_8HI_UNS:
18047 case VSX_BUILTIN_VPERM_4SI_UNS:
18048 case VSX_BUILTIN_VPERM_2DI_UNS:
18049 case VSX_BUILTIN_XXSEL_16QI_UNS:
18050 case VSX_BUILTIN_XXSEL_8HI_UNS:
18051 case VSX_BUILTIN_XXSEL_4SI_UNS:
18052 case VSX_BUILTIN_XXSEL_2DI_UNS:
18053 case CRYPTO_BUILTIN_VPERMXOR:
18054 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18055 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18056 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18057 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18058 case CRYPTO_BUILTIN_VSHASIGMAW:
18059 case CRYPTO_BUILTIN_VSHASIGMAD:
18060 case CRYPTO_BUILTIN_VSHASIGMA:
18061 h.uns_p[0] = 1;
18062 h.uns_p[1] = 1;
18063 h.uns_p[2] = 1;
18064 h.uns_p[3] = 1;
18065 break;
18067 /* signed permute functions with unsigned char mask. */
18068 case ALTIVEC_BUILTIN_VPERM_16QI:
18069 case ALTIVEC_BUILTIN_VPERM_8HI:
18070 case ALTIVEC_BUILTIN_VPERM_4SI:
18071 case ALTIVEC_BUILTIN_VPERM_4SF:
18072 case ALTIVEC_BUILTIN_VPERM_2DI:
18073 case ALTIVEC_BUILTIN_VPERM_2DF:
18074 case VSX_BUILTIN_VPERM_16QI:
18075 case VSX_BUILTIN_VPERM_8HI:
18076 case VSX_BUILTIN_VPERM_4SI:
18077 case VSX_BUILTIN_VPERM_4SF:
18078 case VSX_BUILTIN_VPERM_2DI:
18079 case VSX_BUILTIN_VPERM_2DF:
18080 h.uns_p[3] = 1;
18081 break;
18083 /* unsigned args, signed return. */
18084 case VSX_BUILTIN_XVCVUXDDP_UNS:
18085 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18086 h.uns_p[1] = 1;
18087 break;
18089 /* signed args, unsigned return. */
18090 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18091 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18092 case MISC_BUILTIN_UNPACK_TD:
18093 case MISC_BUILTIN_UNPACK_V1TI:
18094 h.uns_p[0] = 1;
18095 break;
18097 /* unsigned arguments for 128-bit pack instructions. */
18098 case MISC_BUILTIN_PACK_TD:
18099 case MISC_BUILTIN_PACK_V1TI:
18100 h.uns_p[1] = 1;
18101 h.uns_p[2] = 1;
18102 break;
18104 default:
18105 break;
18108 /* Figure out how many args are present. */
18109 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18110 num_args--;
18112 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18113 if (!ret_type && h.uns_p[0])
18114 ret_type = builtin_mode_to_type[h.mode[0]][0];
18116 if (!ret_type)
18117 fatal_error (input_location,
18118 "internal error: builtin function %s had an unexpected "
18119 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18121 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18122 arg_type[i] = NULL_TREE;
18124 for (i = 0; i < num_args; i++)
18126 int m = (int) h.mode[i+1];
18127 int uns_p = h.uns_p[i+1];
18129 arg_type[i] = builtin_mode_to_type[m][uns_p];
18130 if (!arg_type[i] && uns_p)
18131 arg_type[i] = builtin_mode_to_type[m][0];
18133 if (!arg_type[i])
18134 fatal_error (input_location,
18135 "internal error: builtin function %s, argument %d "
18136 "had unexpected argument type %s", name, i,
18137 GET_MODE_NAME (m));
18140 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18141 if (*found == NULL)
18143 h2 = ggc_alloc<builtin_hash_struct> ();
18144 *h2 = h;
18145 *found = h2;
18147 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18148 arg_type[2], NULL_TREE);
18151 return (*found)->type;
18154 static void
18155 rs6000_common_init_builtins (void)
18157 const struct builtin_description *d;
18158 size_t i;
18160 tree opaque_ftype_opaque = NULL_TREE;
18161 tree opaque_ftype_opaque_opaque = NULL_TREE;
18162 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18163 tree v2si_ftype = NULL_TREE;
18164 tree v2si_ftype_qi = NULL_TREE;
18165 tree v2si_ftype_v2si_qi = NULL_TREE;
18166 tree v2si_ftype_int_qi = NULL_TREE;
18167 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18169 if (!TARGET_PAIRED_FLOAT)
18171 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18172 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18175 /* Paired and SPE builtins are only available if you build a compiler with
18176 the appropriate options, so only create those builtins with the
18177 appropriate compiler option. Create Altivec and VSX builtins on machines
18178 with at least the general purpose extensions (970 and newer) to allow the
18179 use of the target attribute.. */
18181 if (TARGET_EXTRA_BUILTINS)
18182 builtin_mask |= RS6000_BTM_COMMON;
18184 /* Add the ternary operators. */
18185 d = bdesc_3arg;
18186 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18188 tree type;
18189 HOST_WIDE_INT mask = d->mask;
18191 if ((mask & builtin_mask) != mask)
18193 if (TARGET_DEBUG_BUILTIN)
18194 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18195 continue;
18198 if (rs6000_overloaded_builtin_p (d->code))
18200 if (! (type = opaque_ftype_opaque_opaque_opaque))
18201 type = opaque_ftype_opaque_opaque_opaque
18202 = build_function_type_list (opaque_V4SI_type_node,
18203 opaque_V4SI_type_node,
18204 opaque_V4SI_type_node,
18205 opaque_V4SI_type_node,
18206 NULL_TREE);
18208 else
18210 enum insn_code icode = d->icode;
18211 if (d->name == 0)
18213 if (TARGET_DEBUG_BUILTIN)
18214 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18215 (long unsigned)i);
18217 continue;
18220 if (icode == CODE_FOR_nothing)
18222 if (TARGET_DEBUG_BUILTIN)
18223 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18224 d->name);
18226 continue;
18229 type = builtin_function_type (insn_data[icode].operand[0].mode,
18230 insn_data[icode].operand[1].mode,
18231 insn_data[icode].operand[2].mode,
18232 insn_data[icode].operand[3].mode,
18233 d->code, d->name);
18236 def_builtin (d->name, type, d->code);
18239 /* Add the binary operators. */
18240 d = bdesc_2arg;
18241 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18243 machine_mode mode0, mode1, mode2;
18244 tree type;
18245 HOST_WIDE_INT mask = d->mask;
18247 if ((mask & builtin_mask) != mask)
18249 if (TARGET_DEBUG_BUILTIN)
18250 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18251 continue;
18254 if (rs6000_overloaded_builtin_p (d->code))
18256 if (! (type = opaque_ftype_opaque_opaque))
18257 type = opaque_ftype_opaque_opaque
18258 = build_function_type_list (opaque_V4SI_type_node,
18259 opaque_V4SI_type_node,
18260 opaque_V4SI_type_node,
18261 NULL_TREE);
18263 else
18265 enum insn_code icode = d->icode;
18266 if (d->name == 0)
18268 if (TARGET_DEBUG_BUILTIN)
18269 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18270 (long unsigned)i);
18272 continue;
18275 if (icode == CODE_FOR_nothing)
18277 if (TARGET_DEBUG_BUILTIN)
18278 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18279 d->name);
18281 continue;
18284 mode0 = insn_data[icode].operand[0].mode;
18285 mode1 = insn_data[icode].operand[1].mode;
18286 mode2 = insn_data[icode].operand[2].mode;
18288 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18290 if (! (type = v2si_ftype_v2si_qi))
18291 type = v2si_ftype_v2si_qi
18292 = build_function_type_list (opaque_V2SI_type_node,
18293 opaque_V2SI_type_node,
18294 char_type_node,
18295 NULL_TREE);
18298 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18299 && mode2 == QImode)
18301 if (! (type = v2si_ftype_int_qi))
18302 type = v2si_ftype_int_qi
18303 = build_function_type_list (opaque_V2SI_type_node,
18304 integer_type_node,
18305 char_type_node,
18306 NULL_TREE);
18309 else
18310 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18311 d->code, d->name);
18314 def_builtin (d->name, type, d->code);
18317 /* Add the simple unary operators. */
18318 d = bdesc_1arg;
18319 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18321 machine_mode mode0, mode1;
18322 tree type;
18323 HOST_WIDE_INT mask = d->mask;
18325 if ((mask & builtin_mask) != mask)
18327 if (TARGET_DEBUG_BUILTIN)
18328 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18329 continue;
18332 if (rs6000_overloaded_builtin_p (d->code))
18334 if (! (type = opaque_ftype_opaque))
18335 type = opaque_ftype_opaque
18336 = build_function_type_list (opaque_V4SI_type_node,
18337 opaque_V4SI_type_node,
18338 NULL_TREE);
18340 else
18342 enum insn_code icode = d->icode;
18343 if (d->name == 0)
18345 if (TARGET_DEBUG_BUILTIN)
18346 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18347 (long unsigned)i);
18349 continue;
18352 if (icode == CODE_FOR_nothing)
18354 if (TARGET_DEBUG_BUILTIN)
18355 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18356 d->name);
18358 continue;
18361 mode0 = insn_data[icode].operand[0].mode;
18362 mode1 = insn_data[icode].operand[1].mode;
18364 if (mode0 == V2SImode && mode1 == QImode)
18366 if (! (type = v2si_ftype_qi))
18367 type = v2si_ftype_qi
18368 = build_function_type_list (opaque_V2SI_type_node,
18369 char_type_node,
18370 NULL_TREE);
18373 else
18374 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18375 d->code, d->name);
18378 def_builtin (d->name, type, d->code);
18381 /* Add the simple no-argument operators. */
18382 d = bdesc_0arg;
18383 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18385 machine_mode mode0;
18386 tree type;
18387 HOST_WIDE_INT mask = d->mask;
18389 if ((mask & builtin_mask) != mask)
18391 if (TARGET_DEBUG_BUILTIN)
18392 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18393 continue;
18395 if (rs6000_overloaded_builtin_p (d->code))
18397 if (!opaque_ftype_opaque)
18398 opaque_ftype_opaque
18399 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18400 type = opaque_ftype_opaque;
18402 else
18404 enum insn_code icode = d->icode;
18405 if (d->name == 0)
18407 if (TARGET_DEBUG_BUILTIN)
18408 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18409 (long unsigned) i);
18410 continue;
18412 if (icode == CODE_FOR_nothing)
18414 if (TARGET_DEBUG_BUILTIN)
18415 fprintf (stderr,
18416 "rs6000_builtin, skip no-argument %s (no code)\n",
18417 d->name);
18418 continue;
18420 mode0 = insn_data[icode].operand[0].mode;
18421 if (mode0 == V2SImode)
18423 /* code for SPE */
18424 if (! (type = v2si_ftype))
18426 v2si_ftype
18427 = build_function_type_list (opaque_V2SI_type_node,
18428 NULL_TREE);
18429 type = v2si_ftype;
18432 else
18433 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18434 d->code, d->name);
18436 def_builtin (d->name, type, d->code);
18440 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18441 static void
18442 init_float128_ibm (machine_mode mode)
18444 if (!TARGET_XL_COMPAT)
18446 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18447 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18448 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18449 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18451 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18453 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18454 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18455 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18456 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18457 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18458 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18459 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18461 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18462 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18463 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18464 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18465 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18466 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18467 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18468 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18471 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18472 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18474 else
18476 set_optab_libfunc (add_optab, mode, "_xlqadd");
18477 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18478 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18479 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18482 /* Add various conversions for IFmode to use the traditional TFmode
18483 names. */
18484 if (mode == IFmode)
18486 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18487 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18488 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18489 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18490 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18491 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18493 if (TARGET_POWERPC64)
18495 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18496 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18497 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18498 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18503 /* Set up IEEE 128-bit floating point routines. Use different names if the
18504 arguments can be passed in a vector register. The historical PowerPC
18505 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18506 continue to use that if we aren't using vector registers to pass IEEE
18507 128-bit floating point. */
18509 static void
18510 init_float128_ieee (machine_mode mode)
18512 if (FLOAT128_VECTOR_P (mode))
18514 set_optab_libfunc (add_optab, mode, "__addkf3");
18515 set_optab_libfunc (sub_optab, mode, "__subkf3");
18516 set_optab_libfunc (neg_optab, mode, "__negkf2");
18517 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18518 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18519 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18520 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18522 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18523 set_optab_libfunc (ne_optab, mode, "__nekf2");
18524 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18525 set_optab_libfunc (ge_optab, mode, "__gekf2");
18526 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18527 set_optab_libfunc (le_optab, mode, "__lekf2");
18528 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18530 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18531 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18532 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18533 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18535 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18536 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18537 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18539 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18540 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18541 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18543 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18544 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18545 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18546 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18547 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18548 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18550 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18551 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18552 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18553 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18555 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18556 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18557 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18558 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18560 if (TARGET_POWERPC64)
18562 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18563 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18564 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18565 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18569 else
18571 set_optab_libfunc (add_optab, mode, "_q_add");
18572 set_optab_libfunc (sub_optab, mode, "_q_sub");
18573 set_optab_libfunc (neg_optab, mode, "_q_neg");
18574 set_optab_libfunc (smul_optab, mode, "_q_mul");
18575 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18576 if (TARGET_PPC_GPOPT)
18577 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18579 set_optab_libfunc (eq_optab, mode, "_q_feq");
18580 set_optab_libfunc (ne_optab, mode, "_q_fne");
18581 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18582 set_optab_libfunc (ge_optab, mode, "_q_fge");
18583 set_optab_libfunc (lt_optab, mode, "_q_flt");
18584 set_optab_libfunc (le_optab, mode, "_q_fle");
18586 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18587 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18588 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18589 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18590 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18591 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18592 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18593 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18597 static void
18598 rs6000_init_libfuncs (void)
18600 /* __float128 support. */
18601 if (TARGET_FLOAT128_TYPE)
18603 init_float128_ibm (IFmode);
18604 init_float128_ieee (KFmode);
18607 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18608 if (TARGET_LONG_DOUBLE_128)
18610 if (!TARGET_IEEEQUAD)
18611 init_float128_ibm (TFmode);
18613 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18614 else
18615 init_float128_ieee (TFmode);
18620 /* Expand a block clear operation, and return 1 if successful. Return 0
18621 if we should let the compiler generate normal code.
18623 operands[0] is the destination
18624 operands[1] is the length
18625 operands[3] is the alignment */
18628 expand_block_clear (rtx operands[])
18630 rtx orig_dest = operands[0];
18631 rtx bytes_rtx = operands[1];
18632 rtx align_rtx = operands[3];
18633 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18634 HOST_WIDE_INT align;
18635 HOST_WIDE_INT bytes;
18636 int offset;
18637 int clear_bytes;
18638 int clear_step;
18640 /* If this is not a fixed size move, just call memcpy */
18641 if (! constp)
18642 return 0;
18644 /* This must be a fixed size alignment */
18645 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18646 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18648 /* Anything to clear? */
18649 bytes = INTVAL (bytes_rtx);
18650 if (bytes <= 0)
18651 return 1;
18653 /* Use the builtin memset after a point, to avoid huge code bloat.
18654 When optimize_size, avoid any significant code bloat; calling
18655 memset is about 4 instructions, so allow for one instruction to
18656 load zero and three to do clearing. */
18657 if (TARGET_ALTIVEC && align >= 128)
18658 clear_step = 16;
18659 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18660 clear_step = 8;
18661 else if (TARGET_SPE && align >= 64)
18662 clear_step = 8;
18663 else
18664 clear_step = 4;
18666 if (optimize_size && bytes > 3 * clear_step)
18667 return 0;
18668 if (! optimize_size && bytes > 8 * clear_step)
18669 return 0;
18671 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18673 machine_mode mode = BLKmode;
18674 rtx dest;
18676 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18678 clear_bytes = 16;
18679 mode = V4SImode;
18681 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18683 clear_bytes = 8;
18684 mode = V2SImode;
18686 else if (bytes >= 8 && TARGET_POWERPC64
18687 && (align >= 64 || !STRICT_ALIGNMENT))
18689 clear_bytes = 8;
18690 mode = DImode;
18691 if (offset == 0 && align < 64)
18693 rtx addr;
18695 /* If the address form is reg+offset with offset not a
18696 multiple of four, reload into reg indirect form here
18697 rather than waiting for reload. This way we get one
18698 reload, not one per store. */
18699 addr = XEXP (orig_dest, 0);
18700 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18701 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18702 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18704 addr = copy_addr_to_reg (addr);
18705 orig_dest = replace_equiv_address (orig_dest, addr);
18709 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18710 { /* move 4 bytes */
18711 clear_bytes = 4;
18712 mode = SImode;
18714 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18715 { /* move 2 bytes */
18716 clear_bytes = 2;
18717 mode = HImode;
18719 else /* move 1 byte at a time */
18721 clear_bytes = 1;
18722 mode = QImode;
18725 dest = adjust_address (orig_dest, mode, offset);
18727 emit_move_insn (dest, CONST0_RTX (mode));
18730 return 1;
18733 /* Emit a potentially record-form instruction, setting DST from SRC.
18734 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18735 signed comparison of DST with zero. If DOT is 1, the generated RTL
18736 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18737 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18738 a separate COMPARE. */
18740 static void
18741 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18743 if (dot == 0)
18745 emit_move_insn (dst, src);
18746 return;
18749 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18751 emit_move_insn (dst, src);
18752 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18753 return;
18756 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18757 if (dot == 1)
18759 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18760 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18762 else
18764 rtx set = gen_rtx_SET (dst, src);
18765 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18770 /* Figure out the correct instructions to generate to load data for
18771 block compare. MODE is used for the read from memory, and
18772 data is zero extended if REG is wider than MODE. If LE code
18773 is being generated, bswap loads are used.
18775 REG is the destination register to move the data into.
18776 MEM is the memory block being read.
18777 MODE is the mode of memory to use for the read. */
18778 static void
18779 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
18781 switch (GET_MODE (reg))
18783 case DImode:
18784 switch (mode)
18786 case QImode:
18787 emit_insn (gen_zero_extendqidi2 (reg, mem));
18788 break;
18789 case HImode:
18791 rtx src = mem;
18792 if (!BYTES_BIG_ENDIAN)
18794 src = gen_reg_rtx (HImode);
18795 emit_insn (gen_bswaphi2 (src, mem));
18797 emit_insn (gen_zero_extendhidi2 (reg, src));
18798 break;
18800 case SImode:
18802 rtx src = mem;
18803 if (!BYTES_BIG_ENDIAN)
18805 src = gen_reg_rtx (SImode);
18806 emit_insn (gen_bswapsi2 (src, mem));
18808 emit_insn (gen_zero_extendsidi2 (reg, src));
18810 break;
18811 case DImode:
18812 if (!BYTES_BIG_ENDIAN)
18813 emit_insn (gen_bswapdi2 (reg, mem));
18814 else
18815 emit_insn (gen_movdi (reg, mem));
18816 break;
18817 default:
18818 gcc_unreachable ();
18820 break;
18822 case SImode:
18823 switch (mode)
18825 case QImode:
18826 emit_insn (gen_zero_extendqisi2 (reg, mem));
18827 break;
18828 case HImode:
18830 rtx src = mem;
18831 if (!BYTES_BIG_ENDIAN)
18833 src = gen_reg_rtx (HImode);
18834 emit_insn (gen_bswaphi2 (src, mem));
18836 emit_insn (gen_zero_extendhisi2 (reg, src));
18837 break;
18839 case SImode:
18840 if (!BYTES_BIG_ENDIAN)
18841 emit_insn (gen_bswapsi2 (reg, mem));
18842 else
18843 emit_insn (gen_movsi (reg, mem));
18844 break;
18845 case DImode:
18846 /* DImode is larger than the destination reg so is not expected. */
18847 gcc_unreachable ();
18848 break;
18849 default:
18850 gcc_unreachable ();
18852 break;
18853 default:
18854 gcc_unreachable ();
18855 break;
18859 /* Select the mode to be used for reading the next chunk of bytes
18860 in the compare.
18862 OFFSET is the current read offset from the beginning of the block.
18863 BYTES is the number of bytes remaining to be read.
18864 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
18865 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
18866 the largest allowable mode. */
18867 static machine_mode
18868 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
18869 HOST_WIDE_INT align, bool word_mode_ok)
18871 /* First see if we can do a whole load unit
18872 as that will be more efficient than a larger load + shift. */
18874 /* If big, use biggest chunk.
18875 If exactly chunk size, use that size.
18876 If remainder can be done in one piece with shifting, do that.
18877 Do largest chunk possible without violating alignment rules. */
18879 /* The most we can read without potential page crossing. */
18880 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
18882 if (word_mode_ok && bytes >= UNITS_PER_WORD)
18883 return word_mode;
18884 else if (bytes == GET_MODE_SIZE (SImode))
18885 return SImode;
18886 else if (bytes == GET_MODE_SIZE (HImode))
18887 return HImode;
18888 else if (bytes == GET_MODE_SIZE (QImode))
18889 return QImode;
18890 else if (bytes < GET_MODE_SIZE (SImode)
18891 && offset >= GET_MODE_SIZE (SImode) - bytes)
18892 /* This matches the case were we have SImode and 3 bytes
18893 and offset >= 1 and permits us to move back one and overlap
18894 with the previous read, thus avoiding having to shift
18895 unwanted bytes off of the input. */
18896 return SImode;
18897 else if (word_mode_ok && bytes < UNITS_PER_WORD
18898 && offset >= UNITS_PER_WORD-bytes)
18899 /* Similarly, if we can use DImode it will get matched here and
18900 can do an overlapping read that ends at the end of the block. */
18901 return word_mode;
18902 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
18903 /* It is safe to do all remaining in one load of largest size,
18904 possibly with a shift to get rid of unwanted bytes. */
18905 return word_mode;
18906 else if (maxread >= GET_MODE_SIZE (SImode))
18907 /* It is safe to do all remaining in one SImode load,
18908 possibly with a shift to get rid of unwanted bytes. */
18909 return SImode;
18910 else if (bytes > GET_MODE_SIZE (SImode))
18911 return SImode;
18912 else if (bytes > GET_MODE_SIZE (HImode))
18913 return HImode;
18915 /* final fallback is do one byte */
18916 return QImode;
18919 /* Compute the alignment of pointer+OFFSET where the original alignment
18920 of pointer was BASE_ALIGN. */
18921 static HOST_WIDE_INT
18922 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
18924 if (offset == 0)
18925 return base_align;
18926 return min (base_align, offset & -offset);
18929 /* Expand a block compare operation, and return true if successful.
18930 Return false if we should let the compiler generate normal code,
18931 probably a memcmp call.
18933 OPERANDS[0] is the target (result).
18934 OPERANDS[1] is the first source.
18935 OPERANDS[2] is the second source.
18936 OPERANDS[3] is the length.
18937 OPERANDS[4] is the alignment. */
18938 bool
18939 expand_block_compare (rtx operands[])
18941 rtx target = operands[0];
18942 rtx orig_src1 = operands[1];
18943 rtx orig_src2 = operands[2];
18944 rtx bytes_rtx = operands[3];
18945 rtx align_rtx = operands[4];
18946 HOST_WIDE_INT cmp_bytes = 0;
18947 rtx src1 = orig_src1;
18948 rtx src2 = orig_src2;
18950 /* If this is not a fixed size compare, just call memcmp */
18951 if (!CONST_INT_P (bytes_rtx))
18952 return false;
18954 /* This must be a fixed size alignment */
18955 if (!CONST_INT_P (align_rtx))
18956 return false;
18958 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
18960 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
18961 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
18962 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
18963 return false;
18965 gcc_assert (GET_MODE (target) == SImode);
18967 /* Anything to move? */
18968 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
18969 if (bytes <= 0)
18970 return true;
18972 /* The code generated for p7 and older is not faster than glibc
18973 memcmp if alignment is small and length is not short, so bail
18974 out to avoid those conditions. */
18975 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
18976 && ((base_align == 1 && bytes > 16)
18977 || (base_align == 2 && bytes > 32)))
18978 return false;
18980 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
18981 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
18983 /* If we have an LE target without ldbrx and word_mode is DImode,
18984 then we must avoid using word_mode. */
18985 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
18986 && word_mode == DImode);
18988 /* Strategy phase. How many ops will this take and should we expand it? */
18990 int offset = 0;
18991 machine_mode load_mode =
18992 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
18993 int load_mode_size = GET_MODE_SIZE (load_mode);
18995 /* We don't want to generate too much code. */
18996 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
18997 > rs6000_block_compare_inline_limit)
18998 return false;
19000 bool generate_6432_conversion = false;
19001 rtx convert_label = NULL;
19002 rtx final_label = NULL;
19004 /* Example of generated code for 11 bytes aligned 1 byte:
19005 .L10:
19006 ldbrx 10,6,9
19007 ldbrx 9,7,9
19008 subf. 9,9,10
19009 bne 0,.L8
19010 addi 9,4,7
19011 lwbrx 10,0,9
19012 addi 9,5,7
19013 lwbrx 9,0,9
19014 subf 9,9,10
19015 b .L9
19016 .L8: # convert_label
19017 cntlzd 9,9
19018 addi 9,9,-1
19019 xori 9,9,0x3f
19020 .L9: # final_label
19022 We start off with DImode and have a compare/branch to something
19023 with a smaller mode then we will need a block with the DI->SI conversion
19024 that may or may not be executed. */
19026 while (bytes > 0)
19028 int align = compute_current_alignment (base_align, offset);
19029 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19030 load_mode = select_block_compare_mode (offset, bytes, align,
19031 word_mode_ok);
19032 else
19033 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19034 load_mode_size = GET_MODE_SIZE (load_mode);
19035 if (bytes >= load_mode_size)
19036 cmp_bytes = load_mode_size;
19037 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19039 /* Move this load back so it doesn't go past the end.
19040 P8/P9 can do this efficiently. */
19041 int extra_bytes = load_mode_size - bytes;
19042 cmp_bytes = bytes;
19043 if (extra_bytes < offset)
19045 offset -= extra_bytes;
19046 cmp_bytes = load_mode_size;
19047 bytes = cmp_bytes;
19050 else
19051 /* P7 and earlier can't do the overlapping load trick fast,
19052 so this forces a non-overlapping load and a shift to get
19053 rid of the extra bytes. */
19054 cmp_bytes = bytes;
19056 src1 = adjust_address (orig_src1, load_mode, offset);
19057 src2 = adjust_address (orig_src2, load_mode, offset);
19059 if (!REG_P (XEXP (src1, 0)))
19061 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19062 src1 = replace_equiv_address (src1, src1_reg);
19064 set_mem_size (src1, cmp_bytes);
19066 if (!REG_P (XEXP (src2, 0)))
19068 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19069 src2 = replace_equiv_address (src2, src2_reg);
19071 set_mem_size (src2, cmp_bytes);
19073 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19074 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19076 if (cmp_bytes < load_mode_size)
19078 /* Shift unneeded bytes off. */
19079 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
19080 if (word_mode == DImode)
19082 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
19083 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
19085 else
19087 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19088 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19092 /* We previously did a block that need 64->32 conversion but
19093 the current block does not, so a label is needed to jump
19094 to the end. */
19095 if (generate_6432_conversion && !final_label
19096 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
19097 final_label = gen_label_rtx ();
19099 /* Do we need a 64->32 conversion block? */
19100 int remain = bytes - cmp_bytes;
19101 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
19103 generate_6432_conversion = true;
19104 if (remain > 0 && !convert_label)
19105 convert_label = gen_label_rtx ();
19108 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
19110 /* Target is larger than load size so we don't need to
19111 reduce result size. */
19112 if (remain > 0)
19114 /* This is not the last block, branch to the end if the result
19115 of this subtract is not zero. */
19116 if (!final_label)
19117 final_label = gen_label_rtx ();
19118 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19119 rtx cond = gen_reg_rtx (CCmode);
19120 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19121 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19122 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19123 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19124 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19125 fin_ref, pc_rtx);
19126 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19127 JUMP_LABEL (j) = final_label;
19128 LABEL_NUSES (final_label) += 1;
19130 else
19132 if (word_mode == DImode)
19134 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19135 tmp_reg_src2));
19136 emit_insn (gen_movsi (target,
19137 gen_lowpart (SImode, tmp_reg_src2)));
19139 else
19140 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
19142 if (final_label)
19144 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19145 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19146 JUMP_LABEL(j) = final_label;
19147 LABEL_NUSES (final_label) += 1;
19148 emit_barrier ();
19152 else
19154 generate_6432_conversion = true;
19155 if (remain > 0)
19157 if (!convert_label)
19158 convert_label = gen_label_rtx ();
19160 /* Compare to zero and branch to convert_label if not zero. */
19161 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
19162 rtx cond = gen_reg_rtx (CCmode);
19163 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
19164 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19165 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19166 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19167 cvt_ref, pc_rtx);
19168 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19169 JUMP_LABEL(j) = convert_label;
19170 LABEL_NUSES (convert_label) += 1;
19172 else
19174 /* Just do the subtract. Since this is the last block the
19175 convert code will be generated immediately following. */
19176 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19177 tmp_reg_src2));
19181 offset += cmp_bytes;
19182 bytes -= cmp_bytes;
19185 if (generate_6432_conversion)
19187 if (convert_label)
19188 emit_label (convert_label);
19190 /* We need to produce DI result from sub, then convert to target SI
19191 while maintaining <0 / ==0 / >0 properties.
19192 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
19193 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
19194 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
19195 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
19196 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19199 if (final_label)
19200 emit_label (final_label);
19202 gcc_assert (bytes == 0);
19203 return true;
19207 /* Expand a block move operation, and return 1 if successful. Return 0
19208 if we should let the compiler generate normal code.
19210 operands[0] is the destination
19211 operands[1] is the source
19212 operands[2] is the length
19213 operands[3] is the alignment */
19215 #define MAX_MOVE_REG 4
19218 expand_block_move (rtx operands[])
19220 rtx orig_dest = operands[0];
19221 rtx orig_src = operands[1];
19222 rtx bytes_rtx = operands[2];
19223 rtx align_rtx = operands[3];
19224 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
19225 int align;
19226 int bytes;
19227 int offset;
19228 int move_bytes;
19229 rtx stores[MAX_MOVE_REG];
19230 int num_reg = 0;
19232 /* If this is not a fixed size move, just call memcpy */
19233 if (! constp)
19234 return 0;
19236 /* This must be a fixed size alignment */
19237 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19238 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19240 /* Anything to move? */
19241 bytes = INTVAL (bytes_rtx);
19242 if (bytes <= 0)
19243 return 1;
19245 if (bytes > rs6000_block_move_inline_limit)
19246 return 0;
19248 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
19250 union {
19251 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
19252 rtx (*mov) (rtx, rtx);
19253 } gen_func;
19254 machine_mode mode = BLKmode;
19255 rtx src, dest;
19257 /* Altivec first, since it will be faster than a string move
19258 when it applies, and usually not significantly larger. */
19259 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
19261 move_bytes = 16;
19262 mode = V4SImode;
19263 gen_func.mov = gen_movv4si;
19265 else if (TARGET_SPE && bytes >= 8 && align >= 64)
19267 move_bytes = 8;
19268 mode = V2SImode;
19269 gen_func.mov = gen_movv2si;
19271 else if (TARGET_STRING
19272 && bytes > 24 /* move up to 32 bytes at a time */
19273 && ! fixed_regs[5]
19274 && ! fixed_regs[6]
19275 && ! fixed_regs[7]
19276 && ! fixed_regs[8]
19277 && ! fixed_regs[9]
19278 && ! fixed_regs[10]
19279 && ! fixed_regs[11]
19280 && ! fixed_regs[12])
19282 move_bytes = (bytes > 32) ? 32 : bytes;
19283 gen_func.movmemsi = gen_movmemsi_8reg;
19285 else if (TARGET_STRING
19286 && bytes > 16 /* move up to 24 bytes at a time */
19287 && ! fixed_regs[5]
19288 && ! fixed_regs[6]
19289 && ! fixed_regs[7]
19290 && ! fixed_regs[8]
19291 && ! fixed_regs[9]
19292 && ! fixed_regs[10])
19294 move_bytes = (bytes > 24) ? 24 : bytes;
19295 gen_func.movmemsi = gen_movmemsi_6reg;
19297 else if (TARGET_STRING
19298 && bytes > 8 /* move up to 16 bytes at a time */
19299 && ! fixed_regs[5]
19300 && ! fixed_regs[6]
19301 && ! fixed_regs[7]
19302 && ! fixed_regs[8])
19304 move_bytes = (bytes > 16) ? 16 : bytes;
19305 gen_func.movmemsi = gen_movmemsi_4reg;
19307 else if (bytes >= 8 && TARGET_POWERPC64
19308 && (align >= 64 || !STRICT_ALIGNMENT))
19310 move_bytes = 8;
19311 mode = DImode;
19312 gen_func.mov = gen_movdi;
19313 if (offset == 0 && align < 64)
19315 rtx addr;
19317 /* If the address form is reg+offset with offset not a
19318 multiple of four, reload into reg indirect form here
19319 rather than waiting for reload. This way we get one
19320 reload, not one per load and/or store. */
19321 addr = XEXP (orig_dest, 0);
19322 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19323 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19324 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19326 addr = copy_addr_to_reg (addr);
19327 orig_dest = replace_equiv_address (orig_dest, addr);
19329 addr = XEXP (orig_src, 0);
19330 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19331 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19332 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19334 addr = copy_addr_to_reg (addr);
19335 orig_src = replace_equiv_address (orig_src, addr);
19339 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
19340 { /* move up to 8 bytes at a time */
19341 move_bytes = (bytes > 8) ? 8 : bytes;
19342 gen_func.movmemsi = gen_movmemsi_2reg;
19344 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19345 { /* move 4 bytes */
19346 move_bytes = 4;
19347 mode = SImode;
19348 gen_func.mov = gen_movsi;
19350 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19351 { /* move 2 bytes */
19352 move_bytes = 2;
19353 mode = HImode;
19354 gen_func.mov = gen_movhi;
19356 else if (TARGET_STRING && bytes > 1)
19357 { /* move up to 4 bytes at a time */
19358 move_bytes = (bytes > 4) ? 4 : bytes;
19359 gen_func.movmemsi = gen_movmemsi_1reg;
19361 else /* move 1 byte at a time */
19363 move_bytes = 1;
19364 mode = QImode;
19365 gen_func.mov = gen_movqi;
19368 src = adjust_address (orig_src, mode, offset);
19369 dest = adjust_address (orig_dest, mode, offset);
19371 if (mode != BLKmode)
19373 rtx tmp_reg = gen_reg_rtx (mode);
19375 emit_insn ((*gen_func.mov) (tmp_reg, src));
19376 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
19379 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
19381 int i;
19382 for (i = 0; i < num_reg; i++)
19383 emit_insn (stores[i]);
19384 num_reg = 0;
19387 if (mode == BLKmode)
19389 /* Move the address into scratch registers. The movmemsi
19390 patterns require zero offset. */
19391 if (!REG_P (XEXP (src, 0)))
19393 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
19394 src = replace_equiv_address (src, src_reg);
19396 set_mem_size (src, move_bytes);
19398 if (!REG_P (XEXP (dest, 0)))
19400 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
19401 dest = replace_equiv_address (dest, dest_reg);
19403 set_mem_size (dest, move_bytes);
19405 emit_insn ((*gen_func.movmemsi) (dest, src,
19406 GEN_INT (move_bytes & 31),
19407 align_rtx));
19411 return 1;
19415 /* Return a string to perform a load_multiple operation.
19416 operands[0] is the vector.
19417 operands[1] is the source address.
19418 operands[2] is the first destination register. */
19420 const char *
19421 rs6000_output_load_multiple (rtx operands[3])
19423 /* We have to handle the case where the pseudo used to contain the address
19424 is assigned to one of the output registers. */
19425 int i, j;
19426 int words = XVECLEN (operands[0], 0);
19427 rtx xop[10];
19429 if (XVECLEN (operands[0], 0) == 1)
19430 return "lwz %2,0(%1)";
19432 for (i = 0; i < words; i++)
19433 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
19435 if (i == words-1)
19437 xop[0] = GEN_INT (4 * (words-1));
19438 xop[1] = operands[1];
19439 xop[2] = operands[2];
19440 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
19441 return "";
19443 else if (i == 0)
19445 xop[0] = GEN_INT (4 * (words-1));
19446 xop[1] = operands[1];
19447 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
19448 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
19449 return "";
19451 else
19453 for (j = 0; j < words; j++)
19454 if (j != i)
19456 xop[0] = GEN_INT (j * 4);
19457 xop[1] = operands[1];
19458 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
19459 output_asm_insn ("lwz %2,%0(%1)", xop);
19461 xop[0] = GEN_INT (i * 4);
19462 xop[1] = operands[1];
19463 output_asm_insn ("lwz %1,%0(%1)", xop);
19464 return "";
19468 return "lswi %2,%1,%N0";
19472 /* A validation routine: say whether CODE, a condition code, and MODE
19473 match. The other alternatives either don't make sense or should
19474 never be generated. */
19476 void
19477 validate_condition_mode (enum rtx_code code, machine_mode mode)
19479 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
19480 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
19481 && GET_MODE_CLASS (mode) == MODE_CC);
19483 /* These don't make sense. */
19484 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
19485 || mode != CCUNSmode);
19487 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
19488 || mode == CCUNSmode);
19490 gcc_assert (mode == CCFPmode
19491 || (code != ORDERED && code != UNORDERED
19492 && code != UNEQ && code != LTGT
19493 && code != UNGT && code != UNLT
19494 && code != UNGE && code != UNLE));
19496 /* These should never be generated except for
19497 flag_finite_math_only. */
19498 gcc_assert (mode != CCFPmode
19499 || flag_finite_math_only
19500 || (code != LE && code != GE
19501 && code != UNEQ && code != LTGT
19502 && code != UNGT && code != UNLT));
19504 /* These are invalid; the information is not there. */
19505 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
19509 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
19510 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
19511 not zero, store there the bit offset (counted from the right) where
19512 the single stretch of 1 bits begins; and similarly for B, the bit
19513 offset where it ends. */
19515 bool
19516 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
19518 unsigned HOST_WIDE_INT val = INTVAL (mask);
19519 unsigned HOST_WIDE_INT bit;
19520 int nb, ne;
19521 int n = GET_MODE_PRECISION (mode);
19523 if (mode != DImode && mode != SImode)
19524 return false;
19526 if (INTVAL (mask) >= 0)
19528 bit = val & -val;
19529 ne = exact_log2 (bit);
19530 nb = exact_log2 (val + bit);
19532 else if (val + 1 == 0)
19534 nb = n;
19535 ne = 0;
19537 else if (val & 1)
19539 val = ~val;
19540 bit = val & -val;
19541 nb = exact_log2 (bit);
19542 ne = exact_log2 (val + bit);
19544 else
19546 bit = val & -val;
19547 ne = exact_log2 (bit);
19548 if (val + bit == 0)
19549 nb = n;
19550 else
19551 nb = 0;
19554 nb--;
19556 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
19557 return false;
19559 if (b)
19560 *b = nb;
19561 if (e)
19562 *e = ne;
19564 return true;
19567 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
19568 or rldicr instruction, to implement an AND with it in mode MODE. */
19570 bool
19571 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
19573 int nb, ne;
19575 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19576 return false;
19578 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
19579 does not wrap. */
19580 if (mode == DImode)
19581 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
19583 /* For SImode, rlwinm can do everything. */
19584 if (mode == SImode)
19585 return (nb < 32 && ne < 32);
19587 return false;
19590 /* Return the instruction template for an AND with mask in mode MODE, with
19591 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19593 const char *
19594 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
19596 int nb, ne;
19598 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
19599 gcc_unreachable ();
19601 if (mode == DImode && ne == 0)
19603 operands[3] = GEN_INT (63 - nb);
19604 if (dot)
19605 return "rldicl. %0,%1,0,%3";
19606 return "rldicl %0,%1,0,%3";
19609 if (mode == DImode && nb == 63)
19611 operands[3] = GEN_INT (63 - ne);
19612 if (dot)
19613 return "rldicr. %0,%1,0,%3";
19614 return "rldicr %0,%1,0,%3";
19617 if (nb < 32 && ne < 32)
19619 operands[3] = GEN_INT (31 - nb);
19620 operands[4] = GEN_INT (31 - ne);
19621 if (dot)
19622 return "rlwinm. %0,%1,0,%3,%4";
19623 return "rlwinm %0,%1,0,%3,%4";
19626 gcc_unreachable ();
19629 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
19630 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
19631 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
19633 bool
19634 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
19636 int nb, ne;
19638 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19639 return false;
19641 int n = GET_MODE_PRECISION (mode);
19642 int sh = -1;
19644 if (CONST_INT_P (XEXP (shift, 1)))
19646 sh = INTVAL (XEXP (shift, 1));
19647 if (sh < 0 || sh >= n)
19648 return false;
19651 rtx_code code = GET_CODE (shift);
19653 /* Convert any shift by 0 to a rotate, to simplify below code. */
19654 if (sh == 0)
19655 code = ROTATE;
19657 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19658 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19659 code = ASHIFT;
19660 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19662 code = LSHIFTRT;
19663 sh = n - sh;
19666 /* DImode rotates need rld*. */
19667 if (mode == DImode && code == ROTATE)
19668 return (nb == 63 || ne == 0 || ne == sh);
19670 /* SImode rotates need rlw*. */
19671 if (mode == SImode && code == ROTATE)
19672 return (nb < 32 && ne < 32 && sh < 32);
19674 /* Wrap-around masks are only okay for rotates. */
19675 if (ne > nb)
19676 return false;
19678 /* Variable shifts are only okay for rotates. */
19679 if (sh < 0)
19680 return false;
19682 /* Don't allow ASHIFT if the mask is wrong for that. */
19683 if (code == ASHIFT && ne < sh)
19684 return false;
19686 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
19687 if the mask is wrong for that. */
19688 if (nb < 32 && ne < 32 && sh < 32
19689 && !(code == LSHIFTRT && nb >= 32 - sh))
19690 return true;
19692 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
19693 if the mask is wrong for that. */
19694 if (code == LSHIFTRT)
19695 sh = 64 - sh;
19696 if (nb == 63 || ne == 0 || ne == sh)
19697 return !(code == LSHIFTRT && nb >= sh);
19699 return false;
19702 /* Return the instruction template for a shift with mask in mode MODE, with
19703 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19705 const char *
19706 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
19708 int nb, ne;
19710 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19711 gcc_unreachable ();
19713 if (mode == DImode && ne == 0)
19715 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19716 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19717 operands[3] = GEN_INT (63 - nb);
19718 if (dot)
19719 return "rld%I2cl. %0,%1,%2,%3";
19720 return "rld%I2cl %0,%1,%2,%3";
19723 if (mode == DImode && nb == 63)
19725 operands[3] = GEN_INT (63 - ne);
19726 if (dot)
19727 return "rld%I2cr. %0,%1,%2,%3";
19728 return "rld%I2cr %0,%1,%2,%3";
19731 if (mode == DImode
19732 && GET_CODE (operands[4]) != LSHIFTRT
19733 && CONST_INT_P (operands[2])
19734 && ne == INTVAL (operands[2]))
19736 operands[3] = GEN_INT (63 - nb);
19737 if (dot)
19738 return "rld%I2c. %0,%1,%2,%3";
19739 return "rld%I2c %0,%1,%2,%3";
19742 if (nb < 32 && ne < 32)
19744 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19745 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19746 operands[3] = GEN_INT (31 - nb);
19747 operands[4] = GEN_INT (31 - ne);
19748 /* This insn can also be a 64-bit rotate with mask that really makes
19749 it just a shift right (with mask); the %h below are to adjust for
19750 that situation (shift count is >= 32 in that case). */
19751 if (dot)
19752 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19753 return "rlw%I2nm %0,%1,%h2,%3,%4";
19756 gcc_unreachable ();
19759 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19760 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19761 ASHIFT, or LSHIFTRT) in mode MODE. */
19763 bool
19764 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19766 int nb, ne;
19768 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19769 return false;
19771 int n = GET_MODE_PRECISION (mode);
19773 int sh = INTVAL (XEXP (shift, 1));
19774 if (sh < 0 || sh >= n)
19775 return false;
19777 rtx_code code = GET_CODE (shift);
19779 /* Convert any shift by 0 to a rotate, to simplify below code. */
19780 if (sh == 0)
19781 code = ROTATE;
19783 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19784 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19785 code = ASHIFT;
19786 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19788 code = LSHIFTRT;
19789 sh = n - sh;
19792 /* DImode rotates need rldimi. */
19793 if (mode == DImode && code == ROTATE)
19794 return (ne == sh);
19796 /* SImode rotates need rlwimi. */
19797 if (mode == SImode && code == ROTATE)
19798 return (nb < 32 && ne < 32 && sh < 32);
19800 /* Wrap-around masks are only okay for rotates. */
19801 if (ne > nb)
19802 return false;
19804 /* Don't allow ASHIFT if the mask is wrong for that. */
19805 if (code == ASHIFT && ne < sh)
19806 return false;
19808 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19809 if the mask is wrong for that. */
19810 if (nb < 32 && ne < 32 && sh < 32
19811 && !(code == LSHIFTRT && nb >= 32 - sh))
19812 return true;
19814 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19815 if the mask is wrong for that. */
19816 if (code == LSHIFTRT)
19817 sh = 64 - sh;
19818 if (ne == sh)
19819 return !(code == LSHIFTRT && nb >= sh);
19821 return false;
19824 /* Return the instruction template for an insert with mask in mode MODE, with
19825 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19827 const char *
19828 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19830 int nb, ne;
19832 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19833 gcc_unreachable ();
19835 /* Prefer rldimi because rlwimi is cracked. */
19836 if (TARGET_POWERPC64
19837 && (!dot || mode == DImode)
19838 && GET_CODE (operands[4]) != LSHIFTRT
19839 && ne == INTVAL (operands[2]))
19841 operands[3] = GEN_INT (63 - nb);
19842 if (dot)
19843 return "rldimi. %0,%1,%2,%3";
19844 return "rldimi %0,%1,%2,%3";
19847 if (nb < 32 && ne < 32)
19849 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19850 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19851 operands[3] = GEN_INT (31 - nb);
19852 operands[4] = GEN_INT (31 - ne);
19853 if (dot)
19854 return "rlwimi. %0,%1,%2,%3,%4";
19855 return "rlwimi %0,%1,%2,%3,%4";
19858 gcc_unreachable ();
19861 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19862 using two machine instructions. */
19864 bool
19865 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19867 /* There are two kinds of AND we can handle with two insns:
19868 1) those we can do with two rl* insn;
19869 2) ori[s];xori[s].
19871 We do not handle that last case yet. */
19873 /* If there is just one stretch of ones, we can do it. */
19874 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19875 return true;
19877 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19878 one insn, we can do the whole thing with two. */
19879 unsigned HOST_WIDE_INT val = INTVAL (c);
19880 unsigned HOST_WIDE_INT bit1 = val & -val;
19881 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19882 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19883 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19884 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19887 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19888 If EXPAND is true, split rotate-and-mask instructions we generate to
19889 their constituent parts as well (this is used during expand); if DOT
19890 is 1, make the last insn a record-form instruction clobbering the
19891 destination GPR and setting the CC reg (from operands[3]); if 2, set
19892 that GPR as well as the CC reg. */
19894 void
19895 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19897 gcc_assert (!(expand && dot));
19899 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19901 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19902 shift right. This generates better code than doing the masks without
19903 shifts, or shifting first right and then left. */
19904 int nb, ne;
19905 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19907 gcc_assert (mode == DImode);
19909 int shift = 63 - nb;
19910 if (expand)
19912 rtx tmp1 = gen_reg_rtx (DImode);
19913 rtx tmp2 = gen_reg_rtx (DImode);
19914 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19915 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19916 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19918 else
19920 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19921 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19922 emit_move_insn (operands[0], tmp);
19923 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19924 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19926 return;
19929 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19930 that does the rest. */
19931 unsigned HOST_WIDE_INT bit1 = val & -val;
19932 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19933 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19934 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19936 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19937 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19939 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19941 /* Two "no-rotate"-and-mask instructions, for SImode. */
19942 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19944 gcc_assert (mode == SImode);
19946 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19947 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19948 emit_move_insn (reg, tmp);
19949 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19950 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19951 return;
19954 gcc_assert (mode == DImode);
19956 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19957 insns; we have to do the first in SImode, because it wraps. */
19958 if (mask2 <= 0xffffffff
19959 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19961 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19962 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19963 GEN_INT (mask1));
19964 rtx reg_low = gen_lowpart (SImode, reg);
19965 emit_move_insn (reg_low, tmp);
19966 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19967 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19968 return;
19971 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19972 at the top end), rotate back and clear the other hole. */
19973 int right = exact_log2 (bit3);
19974 int left = 64 - right;
19976 /* Rotate the mask too. */
19977 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19979 if (expand)
19981 rtx tmp1 = gen_reg_rtx (DImode);
19982 rtx tmp2 = gen_reg_rtx (DImode);
19983 rtx tmp3 = gen_reg_rtx (DImode);
19984 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19985 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19986 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19987 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19989 else
19991 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19992 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19993 emit_move_insn (operands[0], tmp);
19994 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19995 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19996 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20000 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
20001 for lfq and stfq insns iff the registers are hard registers. */
20004 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
20006 /* We might have been passed a SUBREG. */
20007 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
20008 return 0;
20010 /* We might have been passed non floating point registers. */
20011 if (!FP_REGNO_P (REGNO (reg1))
20012 || !FP_REGNO_P (REGNO (reg2)))
20013 return 0;
20015 return (REGNO (reg1) == REGNO (reg2) - 1);
20018 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
20019 addr1 and addr2 must be in consecutive memory locations
20020 (addr2 == addr1 + 8). */
20023 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
20025 rtx addr1, addr2;
20026 unsigned int reg1, reg2;
20027 int offset1, offset2;
20029 /* The mems cannot be volatile. */
20030 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
20031 return 0;
20033 addr1 = XEXP (mem1, 0);
20034 addr2 = XEXP (mem2, 0);
20036 /* Extract an offset (if used) from the first addr. */
20037 if (GET_CODE (addr1) == PLUS)
20039 /* If not a REG, return zero. */
20040 if (GET_CODE (XEXP (addr1, 0)) != REG)
20041 return 0;
20042 else
20044 reg1 = REGNO (XEXP (addr1, 0));
20045 /* The offset must be constant! */
20046 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
20047 return 0;
20048 offset1 = INTVAL (XEXP (addr1, 1));
20051 else if (GET_CODE (addr1) != REG)
20052 return 0;
20053 else
20055 reg1 = REGNO (addr1);
20056 /* This was a simple (mem (reg)) expression. Offset is 0. */
20057 offset1 = 0;
20060 /* And now for the second addr. */
20061 if (GET_CODE (addr2) == PLUS)
20063 /* If not a REG, return zero. */
20064 if (GET_CODE (XEXP (addr2, 0)) != REG)
20065 return 0;
20066 else
20068 reg2 = REGNO (XEXP (addr2, 0));
20069 /* The offset must be constant. */
20070 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
20071 return 0;
20072 offset2 = INTVAL (XEXP (addr2, 1));
20075 else if (GET_CODE (addr2) != REG)
20076 return 0;
20077 else
20079 reg2 = REGNO (addr2);
20080 /* This was a simple (mem (reg)) expression. Offset is 0. */
20081 offset2 = 0;
20084 /* Both of these must have the same base register. */
20085 if (reg1 != reg2)
20086 return 0;
20088 /* The offset for the second addr must be 8 more than the first addr. */
20089 if (offset2 != offset1 + 8)
20090 return 0;
20092 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
20093 instructions. */
20094 return 1;
20099 rs6000_secondary_memory_needed_rtx (machine_mode mode)
20101 static bool eliminated = false;
20102 rtx ret;
20104 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
20105 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20106 else
20108 rtx mem = cfun->machine->sdmode_stack_slot;
20109 gcc_assert (mem != NULL_RTX);
20111 if (!eliminated)
20113 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
20114 cfun->machine->sdmode_stack_slot = mem;
20115 eliminated = true;
20117 ret = mem;
20120 if (TARGET_DEBUG_ADDR)
20122 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
20123 GET_MODE_NAME (mode));
20124 if (!ret)
20125 fprintf (stderr, "\tNULL_RTX\n");
20126 else
20127 debug_rtx (ret);
20130 return ret;
20133 /* Return the mode to be used for memory when a secondary memory
20134 location is needed. For SDmode values we need to use DDmode, in
20135 all other cases we can use the same mode. */
20136 machine_mode
20137 rs6000_secondary_memory_needed_mode (machine_mode mode)
20139 if (lra_in_progress && mode == SDmode)
20140 return DDmode;
20141 return mode;
20144 static tree
20145 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
20147 /* Don't walk into types. */
20148 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
20150 *walk_subtrees = 0;
20151 return NULL_TREE;
20154 switch (TREE_CODE (*tp))
20156 case VAR_DECL:
20157 case PARM_DECL:
20158 case FIELD_DECL:
20159 case RESULT_DECL:
20160 case SSA_NAME:
20161 case REAL_CST:
20162 case MEM_REF:
20163 case VIEW_CONVERT_EXPR:
20164 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
20165 return *tp;
20166 break;
20167 default:
20168 break;
20171 return NULL_TREE;
20174 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
20175 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
20176 only work on the traditional altivec registers, note if an altivec register
20177 was chosen. */
20179 static enum rs6000_reg_type
20180 register_to_reg_type (rtx reg, bool *is_altivec)
20182 HOST_WIDE_INT regno;
20183 enum reg_class rclass;
20185 if (GET_CODE (reg) == SUBREG)
20186 reg = SUBREG_REG (reg);
20188 if (!REG_P (reg))
20189 return NO_REG_TYPE;
20191 regno = REGNO (reg);
20192 if (regno >= FIRST_PSEUDO_REGISTER)
20194 if (!lra_in_progress && !reload_in_progress && !reload_completed)
20195 return PSEUDO_REG_TYPE;
20197 regno = true_regnum (reg);
20198 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
20199 return PSEUDO_REG_TYPE;
20202 gcc_assert (regno >= 0);
20204 if (is_altivec && ALTIVEC_REGNO_P (regno))
20205 *is_altivec = true;
20207 rclass = rs6000_regno_regclass[regno];
20208 return reg_class_to_reg_type[(int)rclass];
20211 /* Helper function to return the cost of adding a TOC entry address. */
20213 static inline int
20214 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
20216 int ret;
20218 if (TARGET_CMODEL != CMODEL_SMALL)
20219 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
20221 else
20222 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
20224 return ret;
20227 /* Helper function for rs6000_secondary_reload to determine whether the memory
20228 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
20229 needs reloading. Return negative if the memory is not handled by the memory
20230 helper functions and to try a different reload method, 0 if no additional
20231 instructions are need, and positive to give the extra cost for the
20232 memory. */
20234 static int
20235 rs6000_secondary_reload_memory (rtx addr,
20236 enum reg_class rclass,
20237 machine_mode mode)
20239 int extra_cost = 0;
20240 rtx reg, and_arg, plus_arg0, plus_arg1;
20241 addr_mask_type addr_mask;
20242 const char *type = NULL;
20243 const char *fail_msg = NULL;
20245 if (GPR_REG_CLASS_P (rclass))
20246 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20248 else if (rclass == FLOAT_REGS)
20249 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20251 else if (rclass == ALTIVEC_REGS)
20252 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20254 /* For the combined VSX_REGS, turn off Altivec AND -16. */
20255 else if (rclass == VSX_REGS)
20256 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
20257 & ~RELOAD_REG_AND_M16);
20259 /* If the register allocator hasn't made up its mind yet on the register
20260 class to use, settle on defaults to use. */
20261 else if (rclass == NO_REGS)
20263 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
20264 & ~RELOAD_REG_AND_M16);
20266 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
20267 addr_mask &= ~(RELOAD_REG_INDEXED
20268 | RELOAD_REG_PRE_INCDEC
20269 | RELOAD_REG_PRE_MODIFY);
20272 else
20273 addr_mask = 0;
20275 /* If the register isn't valid in this register class, just return now. */
20276 if ((addr_mask & RELOAD_REG_VALID) == 0)
20278 if (TARGET_DEBUG_ADDR)
20280 fprintf (stderr,
20281 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20282 "not valid in class\n",
20283 GET_MODE_NAME (mode), reg_class_names[rclass]);
20284 debug_rtx (addr);
20287 return -1;
20290 switch (GET_CODE (addr))
20292 /* Does the register class supports auto update forms for this mode? We
20293 don't need a scratch register, since the powerpc only supports
20294 PRE_INC, PRE_DEC, and PRE_MODIFY. */
20295 case PRE_INC:
20296 case PRE_DEC:
20297 reg = XEXP (addr, 0);
20298 if (!base_reg_operand (addr, GET_MODE (reg)))
20300 fail_msg = "no base register #1";
20301 extra_cost = -1;
20304 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20306 extra_cost = 1;
20307 type = "update";
20309 break;
20311 case PRE_MODIFY:
20312 reg = XEXP (addr, 0);
20313 plus_arg1 = XEXP (addr, 1);
20314 if (!base_reg_operand (reg, GET_MODE (reg))
20315 || GET_CODE (plus_arg1) != PLUS
20316 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
20318 fail_msg = "bad PRE_MODIFY";
20319 extra_cost = -1;
20322 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20324 extra_cost = 1;
20325 type = "update";
20327 break;
20329 /* Do we need to simulate AND -16 to clear the bottom address bits used
20330 in VMX load/stores? Only allow the AND for vector sizes. */
20331 case AND:
20332 and_arg = XEXP (addr, 0);
20333 if (GET_MODE_SIZE (mode) != 16
20334 || GET_CODE (XEXP (addr, 1)) != CONST_INT
20335 || INTVAL (XEXP (addr, 1)) != -16)
20337 fail_msg = "bad Altivec AND #1";
20338 extra_cost = -1;
20341 if (rclass != ALTIVEC_REGS)
20343 if (legitimate_indirect_address_p (and_arg, false))
20344 extra_cost = 1;
20346 else if (legitimate_indexed_address_p (and_arg, false))
20347 extra_cost = 2;
20349 else
20351 fail_msg = "bad Altivec AND #2";
20352 extra_cost = -1;
20355 type = "and";
20357 break;
20359 /* If this is an indirect address, make sure it is a base register. */
20360 case REG:
20361 case SUBREG:
20362 if (!legitimate_indirect_address_p (addr, false))
20364 extra_cost = 1;
20365 type = "move";
20367 break;
20369 /* If this is an indexed address, make sure the register class can handle
20370 indexed addresses for this mode. */
20371 case PLUS:
20372 plus_arg0 = XEXP (addr, 0);
20373 plus_arg1 = XEXP (addr, 1);
20375 /* (plus (plus (reg) (constant)) (constant)) is generated during
20376 push_reload processing, so handle it now. */
20377 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
20379 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20381 extra_cost = 1;
20382 type = "offset";
20386 /* (plus (plus (reg) (constant)) (reg)) is also generated during
20387 push_reload processing, so handle it now. */
20388 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
20390 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20392 extra_cost = 1;
20393 type = "indexed #2";
20397 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
20399 fail_msg = "no base register #2";
20400 extra_cost = -1;
20403 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
20405 if ((addr_mask & RELOAD_REG_INDEXED) == 0
20406 || !legitimate_indexed_address_p (addr, false))
20408 extra_cost = 1;
20409 type = "indexed";
20413 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
20414 && CONST_INT_P (plus_arg1))
20416 if (!quad_address_offset_p (INTVAL (plus_arg1)))
20418 extra_cost = 1;
20419 type = "vector d-form offset";
20423 /* Make sure the register class can handle offset addresses. */
20424 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20426 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20428 extra_cost = 1;
20429 type = "offset #2";
20433 else
20435 fail_msg = "bad PLUS";
20436 extra_cost = -1;
20439 break;
20441 case LO_SUM:
20442 /* Quad offsets are restricted and can't handle normal addresses. */
20443 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20445 extra_cost = -1;
20446 type = "vector d-form lo_sum";
20449 else if (!legitimate_lo_sum_address_p (mode, addr, false))
20451 fail_msg = "bad LO_SUM";
20452 extra_cost = -1;
20455 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20457 extra_cost = 1;
20458 type = "lo_sum";
20460 break;
20462 /* Static addresses need to create a TOC entry. */
20463 case CONST:
20464 case SYMBOL_REF:
20465 case LABEL_REF:
20466 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20468 extra_cost = -1;
20469 type = "vector d-form lo_sum #2";
20472 else
20474 type = "address";
20475 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
20477 break;
20479 /* TOC references look like offsetable memory. */
20480 case UNSPEC:
20481 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
20483 fail_msg = "bad UNSPEC";
20484 extra_cost = -1;
20487 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20489 extra_cost = -1;
20490 type = "vector d-form lo_sum #3";
20493 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20495 extra_cost = 1;
20496 type = "toc reference";
20498 break;
20500 default:
20502 fail_msg = "bad address";
20503 extra_cost = -1;
20507 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
20509 if (extra_cost < 0)
20510 fprintf (stderr,
20511 "rs6000_secondary_reload_memory error: mode = %s, "
20512 "class = %s, addr_mask = '%s', %s\n",
20513 GET_MODE_NAME (mode),
20514 reg_class_names[rclass],
20515 rs6000_debug_addr_mask (addr_mask, false),
20516 (fail_msg != NULL) ? fail_msg : "<bad address>");
20518 else
20519 fprintf (stderr,
20520 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20521 "addr_mask = '%s', extra cost = %d, %s\n",
20522 GET_MODE_NAME (mode),
20523 reg_class_names[rclass],
20524 rs6000_debug_addr_mask (addr_mask, false),
20525 extra_cost,
20526 (type) ? type : "<none>");
20528 debug_rtx (addr);
20531 return extra_cost;
20534 /* Helper function for rs6000_secondary_reload to return true if a move to a
20535 different register classe is really a simple move. */
20537 static bool
20538 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20539 enum rs6000_reg_type from_type,
20540 machine_mode mode)
20542 int size = GET_MODE_SIZE (mode);
20544 /* Add support for various direct moves available. In this function, we only
20545 look at cases where we don't need any extra registers, and one or more
20546 simple move insns are issued. Originally small integers are not allowed
20547 in FPR/VSX registers. Single precision binary floating is not a simple
20548 move because we need to convert to the single precision memory layout.
20549 The 4-byte SDmode can be moved. TDmode values are disallowed since they
20550 need special direct move handling, which we do not support yet. */
20551 if (TARGET_DIRECT_MOVE
20552 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20553 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
20555 if (TARGET_POWERPC64)
20557 /* ISA 2.07: MTVSRD or MVFVSRD. */
20558 if (size == 8)
20559 return true;
20561 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
20562 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
20563 return true;
20566 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20567 if (TARGET_VSX_SMALL_INTEGER && mode == SImode)
20568 return true;
20570 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20571 if (mode == SDmode)
20572 return true;
20575 /* Power6+: MFTGPR or MFFGPR. */
20576 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
20577 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
20578 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20579 return true;
20581 /* Move to/from SPR. */
20582 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
20583 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
20584 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20585 return true;
20587 return false;
20590 /* Direct move helper function for rs6000_secondary_reload, handle all of the
20591 special direct moves that involve allocating an extra register, return the
20592 insn code of the helper function if there is such a function or
20593 CODE_FOR_nothing if not. */
20595 static bool
20596 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
20597 enum rs6000_reg_type from_type,
20598 machine_mode mode,
20599 secondary_reload_info *sri,
20600 bool altivec_p)
20602 bool ret = false;
20603 enum insn_code icode = CODE_FOR_nothing;
20604 int cost = 0;
20605 int size = GET_MODE_SIZE (mode);
20607 if (TARGET_POWERPC64 && size == 16)
20609 /* Handle moving 128-bit values from GPRs to VSX point registers on
20610 ISA 2.07 (power8, power9) when running in 64-bit mode using
20611 XXPERMDI to glue the two 64-bit values back together. */
20612 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20614 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
20615 icode = reg_addr[mode].reload_vsx_gpr;
20618 /* Handle moving 128-bit values from VSX point registers to GPRs on
20619 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
20620 bottom 64-bit value. */
20621 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20623 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
20624 icode = reg_addr[mode].reload_gpr_vsx;
20628 else if (TARGET_POWERPC64 && mode == SFmode)
20630 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20632 cost = 3; /* xscvdpspn, mfvsrd, and. */
20633 icode = reg_addr[mode].reload_gpr_vsx;
20636 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20638 cost = 2; /* mtvsrz, xscvspdpn. */
20639 icode = reg_addr[mode].reload_vsx_gpr;
20643 else if (!TARGET_POWERPC64 && size == 8)
20645 /* Handle moving 64-bit values from GPRs to floating point registers on
20646 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
20647 32-bit values back together. Altivec register classes must be handled
20648 specially since a different instruction is used, and the secondary
20649 reload support requires a single instruction class in the scratch
20650 register constraint. However, right now TFmode is not allowed in
20651 Altivec registers, so the pattern will never match. */
20652 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
20654 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
20655 icode = reg_addr[mode].reload_fpr_gpr;
20659 if (icode != CODE_FOR_nothing)
20661 ret = true;
20662 if (sri)
20664 sri->icode = icode;
20665 sri->extra_cost = cost;
20669 return ret;
20672 /* Return whether a move between two register classes can be done either
20673 directly (simple move) or via a pattern that uses a single extra temporary
20674 (using ISA 2.07's direct move in this case. */
20676 static bool
20677 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
20678 enum rs6000_reg_type from_type,
20679 machine_mode mode,
20680 secondary_reload_info *sri,
20681 bool altivec_p)
20683 /* Fall back to load/store reloads if either type is not a register. */
20684 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
20685 return false;
20687 /* If we haven't allocated registers yet, assume the move can be done for the
20688 standard register types. */
20689 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
20690 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
20691 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
20692 return true;
20694 /* Moves to the same set of registers is a simple move for non-specialized
20695 registers. */
20696 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
20697 return true;
20699 /* Check whether a simple move can be done directly. */
20700 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
20702 if (sri)
20704 sri->icode = CODE_FOR_nothing;
20705 sri->extra_cost = 0;
20707 return true;
20710 /* Now check if we can do it in a few steps. */
20711 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
20712 altivec_p);
20715 /* Inform reload about cases where moving X with a mode MODE to a register in
20716 RCLASS requires an extra scratch or immediate register. Return the class
20717 needed for the immediate register.
20719 For VSX and Altivec, we may need a register to convert sp+offset into
20720 reg+sp.
20722 For misaligned 64-bit gpr loads and stores we need a register to
20723 convert an offset address to indirect. */
20725 static reg_class_t
20726 rs6000_secondary_reload (bool in_p,
20727 rtx x,
20728 reg_class_t rclass_i,
20729 machine_mode mode,
20730 secondary_reload_info *sri)
20732 enum reg_class rclass = (enum reg_class) rclass_i;
20733 reg_class_t ret = ALL_REGS;
20734 enum insn_code icode;
20735 bool default_p = false;
20736 bool done_p = false;
20738 /* Allow subreg of memory before/during reload. */
20739 bool memory_p = (MEM_P (x)
20740 || (!reload_completed && GET_CODE (x) == SUBREG
20741 && MEM_P (SUBREG_REG (x))));
20743 sri->icode = CODE_FOR_nothing;
20744 sri->t_icode = CODE_FOR_nothing;
20745 sri->extra_cost = 0;
20746 icode = ((in_p)
20747 ? reg_addr[mode].reload_load
20748 : reg_addr[mode].reload_store);
20750 if (REG_P (x) || register_operand (x, mode))
20752 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20753 bool altivec_p = (rclass == ALTIVEC_REGS);
20754 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20756 if (!in_p)
20757 std::swap (to_type, from_type);
20759 /* Can we do a direct move of some sort? */
20760 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20761 altivec_p))
20763 icode = (enum insn_code)sri->icode;
20764 default_p = false;
20765 done_p = true;
20766 ret = NO_REGS;
20770 /* Make sure 0.0 is not reloaded or forced into memory. */
20771 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20773 ret = NO_REGS;
20774 default_p = false;
20775 done_p = true;
20778 /* If this is a scalar floating point value and we want to load it into the
20779 traditional Altivec registers, do it via a move via a traditional floating
20780 point register, unless we have D-form addressing. Also make sure that
20781 non-zero constants use a FPR. */
20782 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20783 && !mode_supports_vmx_dform (mode)
20784 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20785 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20787 ret = FLOAT_REGS;
20788 default_p = false;
20789 done_p = true;
20792 /* Handle reload of load/stores if we have reload helper functions. */
20793 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20795 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20796 mode);
20798 if (extra_cost >= 0)
20800 done_p = true;
20801 ret = NO_REGS;
20802 if (extra_cost > 0)
20804 sri->extra_cost = extra_cost;
20805 sri->icode = icode;
20810 /* Handle unaligned loads and stores of integer registers. */
20811 if (!done_p && TARGET_POWERPC64
20812 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20813 && memory_p
20814 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20816 rtx addr = XEXP (x, 0);
20817 rtx off = address_offset (addr);
20819 if (off != NULL_RTX)
20821 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20822 unsigned HOST_WIDE_INT offset = INTVAL (off);
20824 /* We need a secondary reload when our legitimate_address_p
20825 says the address is good (as otherwise the entire address
20826 will be reloaded), and the offset is not a multiple of
20827 four or we have an address wrap. Address wrap will only
20828 occur for LO_SUMs since legitimate_offset_address_p
20829 rejects addresses for 16-byte mems that will wrap. */
20830 if (GET_CODE (addr) == LO_SUM
20831 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20832 && ((offset & 3) != 0
20833 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20834 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20835 && (offset & 3) != 0))
20837 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20838 if (in_p)
20839 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20840 : CODE_FOR_reload_di_load);
20841 else
20842 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20843 : CODE_FOR_reload_di_store);
20844 sri->extra_cost = 2;
20845 ret = NO_REGS;
20846 done_p = true;
20848 else
20849 default_p = true;
20851 else
20852 default_p = true;
20855 if (!done_p && !TARGET_POWERPC64
20856 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20857 && memory_p
20858 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20860 rtx addr = XEXP (x, 0);
20861 rtx off = address_offset (addr);
20863 if (off != NULL_RTX)
20865 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20866 unsigned HOST_WIDE_INT offset = INTVAL (off);
20868 /* We need a secondary reload when our legitimate_address_p
20869 says the address is good (as otherwise the entire address
20870 will be reloaded), and we have a wrap.
20872 legitimate_lo_sum_address_p allows LO_SUM addresses to
20873 have any offset so test for wrap in the low 16 bits.
20875 legitimate_offset_address_p checks for the range
20876 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20877 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20878 [0x7ff4,0x7fff] respectively, so test for the
20879 intersection of these ranges, [0x7ffc,0x7fff] and
20880 [0x7ff4,0x7ff7] respectively.
20882 Note that the address we see here may have been
20883 manipulated by legitimize_reload_address. */
20884 if (GET_CODE (addr) == LO_SUM
20885 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20886 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20888 if (in_p)
20889 sri->icode = CODE_FOR_reload_si_load;
20890 else
20891 sri->icode = CODE_FOR_reload_si_store;
20892 sri->extra_cost = 2;
20893 ret = NO_REGS;
20894 done_p = true;
20896 else
20897 default_p = true;
20899 else
20900 default_p = true;
20903 if (!done_p)
20904 default_p = true;
20906 if (default_p)
20907 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20909 gcc_assert (ret != ALL_REGS);
20911 if (TARGET_DEBUG_ADDR)
20913 fprintf (stderr,
20914 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20915 "mode = %s",
20916 reg_class_names[ret],
20917 in_p ? "true" : "false",
20918 reg_class_names[rclass],
20919 GET_MODE_NAME (mode));
20921 if (reload_completed)
20922 fputs (", after reload", stderr);
20924 if (!done_p)
20925 fputs (", done_p not set", stderr);
20927 if (default_p)
20928 fputs (", default secondary reload", stderr);
20930 if (sri->icode != CODE_FOR_nothing)
20931 fprintf (stderr, ", reload func = %s, extra cost = %d",
20932 insn_data[sri->icode].name, sri->extra_cost);
20934 else if (sri->extra_cost > 0)
20935 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20937 fputs ("\n", stderr);
20938 debug_rtx (x);
20941 return ret;
20944 /* Better tracing for rs6000_secondary_reload_inner. */
20946 static void
20947 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20948 bool store_p)
20950 rtx set, clobber;
20952 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20954 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20955 store_p ? "store" : "load");
20957 if (store_p)
20958 set = gen_rtx_SET (mem, reg);
20959 else
20960 set = gen_rtx_SET (reg, mem);
20962 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20963 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20966 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20967 ATTRIBUTE_NORETURN;
20969 static void
20970 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20971 bool store_p)
20973 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20974 gcc_unreachable ();
20977 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20978 reload helper functions. These were identified in
20979 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20980 reload, it calls the insns:
20981 reload_<RELOAD:mode>_<P:mptrsize>_store
20982 reload_<RELOAD:mode>_<P:mptrsize>_load
20984 which in turn calls this function, to do whatever is necessary to create
20985 valid addresses. */
20987 void
20988 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20990 int regno = true_regnum (reg);
20991 machine_mode mode = GET_MODE (reg);
20992 addr_mask_type addr_mask;
20993 rtx addr;
20994 rtx new_addr;
20995 rtx op_reg, op0, op1;
20996 rtx and_op;
20997 rtx cc_clobber;
20998 rtvec rv;
21000 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
21001 || !base_reg_operand (scratch, GET_MODE (scratch)))
21002 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21004 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
21005 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21007 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
21008 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21010 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
21011 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21013 else
21014 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21016 /* Make sure the mode is valid in this register class. */
21017 if ((addr_mask & RELOAD_REG_VALID) == 0)
21018 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21020 if (TARGET_DEBUG_ADDR)
21021 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
21023 new_addr = addr = XEXP (mem, 0);
21024 switch (GET_CODE (addr))
21026 /* Does the register class support auto update forms for this mode? If
21027 not, do the update now. We don't need a scratch register, since the
21028 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
21029 case PRE_INC:
21030 case PRE_DEC:
21031 op_reg = XEXP (addr, 0);
21032 if (!base_reg_operand (op_reg, Pmode))
21033 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21035 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21037 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
21038 new_addr = op_reg;
21040 break;
21042 case PRE_MODIFY:
21043 op0 = XEXP (addr, 0);
21044 op1 = XEXP (addr, 1);
21045 if (!base_reg_operand (op0, Pmode)
21046 || GET_CODE (op1) != PLUS
21047 || !rtx_equal_p (op0, XEXP (op1, 0)))
21048 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21050 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21052 emit_insn (gen_rtx_SET (op0, op1));
21053 new_addr = reg;
21055 break;
21057 /* Do we need to simulate AND -16 to clear the bottom address bits used
21058 in VMX load/stores? */
21059 case AND:
21060 op0 = XEXP (addr, 0);
21061 op1 = XEXP (addr, 1);
21062 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
21064 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
21065 op_reg = op0;
21067 else if (GET_CODE (op1) == PLUS)
21069 emit_insn (gen_rtx_SET (scratch, op1));
21070 op_reg = scratch;
21073 else
21074 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21076 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
21077 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
21078 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
21079 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
21080 new_addr = scratch;
21082 break;
21084 /* If this is an indirect address, make sure it is a base register. */
21085 case REG:
21086 case SUBREG:
21087 if (!base_reg_operand (addr, GET_MODE (addr)))
21089 emit_insn (gen_rtx_SET (scratch, addr));
21090 new_addr = scratch;
21092 break;
21094 /* If this is an indexed address, make sure the register class can handle
21095 indexed addresses for this mode. */
21096 case PLUS:
21097 op0 = XEXP (addr, 0);
21098 op1 = XEXP (addr, 1);
21099 if (!base_reg_operand (op0, Pmode))
21100 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21102 else if (int_reg_operand (op1, Pmode))
21104 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21106 emit_insn (gen_rtx_SET (scratch, addr));
21107 new_addr = scratch;
21111 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
21113 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
21114 || !quad_address_p (addr, mode, false))
21116 emit_insn (gen_rtx_SET (scratch, addr));
21117 new_addr = scratch;
21121 /* Make sure the register class can handle offset addresses. */
21122 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21124 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21126 emit_insn (gen_rtx_SET (scratch, addr));
21127 new_addr = scratch;
21131 else
21132 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21134 break;
21136 case LO_SUM:
21137 op0 = XEXP (addr, 0);
21138 op1 = XEXP (addr, 1);
21139 if (!base_reg_operand (op0, Pmode))
21140 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21142 else if (int_reg_operand (op1, Pmode))
21144 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21146 emit_insn (gen_rtx_SET (scratch, addr));
21147 new_addr = scratch;
21151 /* Quad offsets are restricted and can't handle normal addresses. */
21152 else if (mode_supports_vsx_dform_quad (mode))
21154 emit_insn (gen_rtx_SET (scratch, addr));
21155 new_addr = scratch;
21158 /* Make sure the register class can handle offset addresses. */
21159 else if (legitimate_lo_sum_address_p (mode, addr, false))
21161 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21163 emit_insn (gen_rtx_SET (scratch, addr));
21164 new_addr = scratch;
21168 else
21169 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21171 break;
21173 case SYMBOL_REF:
21174 case CONST:
21175 case LABEL_REF:
21176 rs6000_emit_move (scratch, addr, Pmode);
21177 new_addr = scratch;
21178 break;
21180 default:
21181 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21184 /* Adjust the address if it changed. */
21185 if (addr != new_addr)
21187 mem = replace_equiv_address_nv (mem, new_addr);
21188 if (TARGET_DEBUG_ADDR)
21189 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
21192 /* Now create the move. */
21193 if (store_p)
21194 emit_insn (gen_rtx_SET (mem, reg));
21195 else
21196 emit_insn (gen_rtx_SET (reg, mem));
21198 return;
21201 /* Convert reloads involving 64-bit gprs and misaligned offset
21202 addressing, or multiple 32-bit gprs and offsets that are too large,
21203 to use indirect addressing. */
21205 void
21206 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
21208 int regno = true_regnum (reg);
21209 enum reg_class rclass;
21210 rtx addr;
21211 rtx scratch_or_premodify = scratch;
21213 if (TARGET_DEBUG_ADDR)
21215 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
21216 store_p ? "store" : "load");
21217 fprintf (stderr, "reg:\n");
21218 debug_rtx (reg);
21219 fprintf (stderr, "mem:\n");
21220 debug_rtx (mem);
21221 fprintf (stderr, "scratch:\n");
21222 debug_rtx (scratch);
21225 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
21226 gcc_assert (GET_CODE (mem) == MEM);
21227 rclass = REGNO_REG_CLASS (regno);
21228 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
21229 addr = XEXP (mem, 0);
21231 if (GET_CODE (addr) == PRE_MODIFY)
21233 gcc_assert (REG_P (XEXP (addr, 0))
21234 && GET_CODE (XEXP (addr, 1)) == PLUS
21235 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
21236 scratch_or_premodify = XEXP (addr, 0);
21237 if (!HARD_REGISTER_P (scratch_or_premodify))
21238 /* If we have a pseudo here then reload will have arranged
21239 to have it replaced, but only in the original insn.
21240 Use the replacement here too. */
21241 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21243 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
21244 expressions from the original insn, without unsharing them.
21245 Any RTL that points into the original insn will of course
21246 have register replacements applied. That is why we don't
21247 need to look for replacements under the PLUS. */
21248 addr = XEXP (addr, 1);
21250 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
21252 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
21254 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
21256 /* Now create the move. */
21257 if (store_p)
21258 emit_insn (gen_rtx_SET (mem, reg));
21259 else
21260 emit_insn (gen_rtx_SET (reg, mem));
21262 return;
21265 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
21266 this function has any SDmode references. If we are on a power7 or later, we
21267 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
21268 can load/store the value. */
21270 static void
21271 rs6000_alloc_sdmode_stack_slot (void)
21273 tree t;
21274 basic_block bb;
21275 gimple_stmt_iterator gsi;
21277 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
21278 /* We use a different approach for dealing with the secondary
21279 memory in LRA. */
21280 if (ira_use_lra_p)
21281 return;
21283 if (TARGET_NO_SDMODE_STACK)
21284 return;
21286 FOR_EACH_BB_FN (bb, cfun)
21287 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21289 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
21290 if (ret)
21292 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21293 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21294 SDmode, 0);
21295 return;
21299 /* Check for any SDmode parameters of the function. */
21300 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
21302 if (TREE_TYPE (t) == error_mark_node)
21303 continue;
21305 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
21306 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
21308 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21309 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21310 SDmode, 0);
21311 return;
21316 static void
21317 rs6000_instantiate_decls (void)
21319 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
21320 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
21323 /* Given an rtx X being reloaded into a reg required to be
21324 in class CLASS, return the class of reg to actually use.
21325 In general this is just CLASS; but on some machines
21326 in some cases it is preferable to use a more restrictive class.
21328 On the RS/6000, we have to return NO_REGS when we want to reload a
21329 floating-point CONST_DOUBLE to force it to be copied to memory.
21331 We also don't want to reload integer values into floating-point
21332 registers if we can at all help it. In fact, this can
21333 cause reload to die, if it tries to generate a reload of CTR
21334 into a FP register and discovers it doesn't have the memory location
21335 required.
21337 ??? Would it be a good idea to have reload do the converse, that is
21338 try to reload floating modes into FP registers if possible?
21341 static enum reg_class
21342 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
21344 machine_mode mode = GET_MODE (x);
21345 bool is_constant = CONSTANT_P (x);
21347 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
21348 reload class for it. */
21349 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21350 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
21351 return NO_REGS;
21353 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
21354 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
21355 return NO_REGS;
21357 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
21358 the reloading of address expressions using PLUS into floating point
21359 registers. */
21360 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
21362 if (is_constant)
21364 /* Zero is always allowed in all VSX registers. */
21365 if (x == CONST0_RTX (mode))
21366 return rclass;
21368 /* If this is a vector constant that can be formed with a few Altivec
21369 instructions, we want altivec registers. */
21370 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
21371 return ALTIVEC_REGS;
21373 /* Force constant to memory. */
21374 return NO_REGS;
21377 /* D-form addressing can easily reload the value. */
21378 if (mode_supports_vmx_dform (mode)
21379 || mode_supports_vsx_dform_quad (mode))
21380 return rclass;
21382 /* If this is a scalar floating point value and we don't have D-form
21383 addressing, prefer the traditional floating point registers so that we
21384 can use D-form (register+offset) addressing. */
21385 if (rclass == VSX_REGS
21386 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
21387 return FLOAT_REGS;
21389 /* Prefer the Altivec registers if Altivec is handling the vector
21390 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
21391 loads. */
21392 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
21393 || mode == V1TImode)
21394 return ALTIVEC_REGS;
21396 return rclass;
21399 if (is_constant || GET_CODE (x) == PLUS)
21401 if (reg_class_subset_p (GENERAL_REGS, rclass))
21402 return GENERAL_REGS;
21403 if (reg_class_subset_p (BASE_REGS, rclass))
21404 return BASE_REGS;
21405 return NO_REGS;
21408 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21409 return GENERAL_REGS;
21411 return rclass;
21414 /* Debug version of rs6000_preferred_reload_class. */
21415 static enum reg_class
21416 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
21418 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
21420 fprintf (stderr,
21421 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
21422 "mode = %s, x:\n",
21423 reg_class_names[ret], reg_class_names[rclass],
21424 GET_MODE_NAME (GET_MODE (x)));
21425 debug_rtx (x);
21427 return ret;
21430 /* If we are copying between FP or AltiVec registers and anything else, we need
21431 a memory location. The exception is when we are targeting ppc64 and the
21432 move to/from fpr to gpr instructions are available. Also, under VSX, you
21433 can copy vector registers from the FP register set to the Altivec register
21434 set and vice versa. */
21436 static bool
21437 rs6000_secondary_memory_needed (enum reg_class from_class,
21438 enum reg_class to_class,
21439 machine_mode mode)
21441 enum rs6000_reg_type from_type, to_type;
21442 bool altivec_p = ((from_class == ALTIVEC_REGS)
21443 || (to_class == ALTIVEC_REGS));
21445 /* If a simple/direct move is available, we don't need secondary memory */
21446 from_type = reg_class_to_reg_type[(int)from_class];
21447 to_type = reg_class_to_reg_type[(int)to_class];
21449 if (rs6000_secondary_reload_move (to_type, from_type, mode,
21450 (secondary_reload_info *)0, altivec_p))
21451 return false;
21453 /* If we have a floating point or vector register class, we need to use
21454 memory to transfer the data. */
21455 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21456 return true;
21458 return false;
21461 /* Debug version of rs6000_secondary_memory_needed. */
21462 static bool
21463 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21464 enum reg_class to_class,
21465 machine_mode mode)
21467 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21469 fprintf (stderr,
21470 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21471 "to_class = %s, mode = %s\n",
21472 ret ? "true" : "false",
21473 reg_class_names[from_class],
21474 reg_class_names[to_class],
21475 GET_MODE_NAME (mode));
21477 return ret;
21480 /* Return the register class of a scratch register needed to copy IN into
21481 or out of a register in RCLASS in MODE. If it can be done directly,
21482 NO_REGS is returned. */
21484 static enum reg_class
21485 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
21486 rtx in)
21488 int regno;
21490 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
21491 #if TARGET_MACHO
21492 && MACHOPIC_INDIRECT
21493 #endif
21496 /* We cannot copy a symbolic operand directly into anything
21497 other than BASE_REGS for TARGET_ELF. So indicate that a
21498 register from BASE_REGS is needed as an intermediate
21499 register.
21501 On Darwin, pic addresses require a load from memory, which
21502 needs a base register. */
21503 if (rclass != BASE_REGS
21504 && (GET_CODE (in) == SYMBOL_REF
21505 || GET_CODE (in) == HIGH
21506 || GET_CODE (in) == LABEL_REF
21507 || GET_CODE (in) == CONST))
21508 return BASE_REGS;
21511 if (GET_CODE (in) == REG)
21513 regno = REGNO (in);
21514 if (regno >= FIRST_PSEUDO_REGISTER)
21516 regno = true_regnum (in);
21517 if (regno >= FIRST_PSEUDO_REGISTER)
21518 regno = -1;
21521 else if (GET_CODE (in) == SUBREG)
21523 regno = true_regnum (in);
21524 if (regno >= FIRST_PSEUDO_REGISTER)
21525 regno = -1;
21527 else
21528 regno = -1;
21530 /* If we have VSX register moves, prefer moving scalar values between
21531 Altivec registers and GPR by going via an FPR (and then via memory)
21532 instead of reloading the secondary memory address for Altivec moves. */
21533 if (TARGET_VSX
21534 && GET_MODE_SIZE (mode) < 16
21535 && !mode_supports_vmx_dform (mode)
21536 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
21537 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
21538 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21539 && (regno >= 0 && INT_REGNO_P (regno)))))
21540 return FLOAT_REGS;
21542 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
21543 into anything. */
21544 if (rclass == GENERAL_REGS || rclass == BASE_REGS
21545 || (regno >= 0 && INT_REGNO_P (regno)))
21546 return NO_REGS;
21548 /* Constants, memory, and VSX registers can go into VSX registers (both the
21549 traditional floating point and the altivec registers). */
21550 if (rclass == VSX_REGS
21551 && (regno == -1 || VSX_REGNO_P (regno)))
21552 return NO_REGS;
21554 /* Constants, memory, and FP registers can go into FP registers. */
21555 if ((regno == -1 || FP_REGNO_P (regno))
21556 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21557 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21559 /* Memory, and AltiVec registers can go into AltiVec registers. */
21560 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21561 && rclass == ALTIVEC_REGS)
21562 return NO_REGS;
21564 /* We can copy among the CR registers. */
21565 if ((rclass == CR_REGS || rclass == CR0_REGS)
21566 && regno >= 0 && CR_REGNO_P (regno))
21567 return NO_REGS;
21569 /* Otherwise, we need GENERAL_REGS. */
21570 return GENERAL_REGS;
21573 /* Debug version of rs6000_secondary_reload_class. */
21574 static enum reg_class
21575 rs6000_debug_secondary_reload_class (enum reg_class rclass,
21576 machine_mode mode, rtx in)
21578 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
21579 fprintf (stderr,
21580 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
21581 "mode = %s, input rtx:\n",
21582 reg_class_names[ret], reg_class_names[rclass],
21583 GET_MODE_NAME (mode));
21584 debug_rtx (in);
21586 return ret;
21589 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
21591 static bool
21592 rs6000_cannot_change_mode_class (machine_mode from,
21593 machine_mode to,
21594 enum reg_class rclass)
21596 unsigned from_size = GET_MODE_SIZE (from);
21597 unsigned to_size = GET_MODE_SIZE (to);
21599 if (from_size != to_size)
21601 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21603 if (reg_classes_intersect_p (xclass, rclass))
21605 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21606 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21607 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
21608 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
21610 /* Don't allow 64-bit types to overlap with 128-bit types that take a
21611 single register under VSX because the scalar part of the register
21612 is in the upper 64-bits, and not the lower 64-bits. Types like
21613 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21614 IEEE floating point can't overlap, and neither can small
21615 values. */
21617 if (to_float128_vector_p && from_float128_vector_p)
21618 return false;
21620 else if (to_float128_vector_p || from_float128_vector_p)
21621 return true;
21623 /* TDmode in floating-mode registers must always go into a register
21624 pair with the most significant word in the even-numbered register
21625 to match ISA requirements. In little-endian mode, this does not
21626 match subreg numbering, so we cannot allow subregs. */
21627 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21628 return true;
21630 if (from_size < 8 || to_size < 8)
21631 return true;
21633 if (from_size == 8 && (8 * to_nregs) != to_size)
21634 return true;
21636 if (to_size == 8 && (8 * from_nregs) != from_size)
21637 return true;
21639 return false;
21641 else
21642 return false;
21645 if (TARGET_E500_DOUBLE
21646 && ((((to) == DFmode) + ((from) == DFmode)) == 1
21647 || (((to) == TFmode) + ((from) == TFmode)) == 1
21648 || (((to) == IFmode) + ((from) == IFmode)) == 1
21649 || (((to) == KFmode) + ((from) == KFmode)) == 1
21650 || (((to) == DDmode) + ((from) == DDmode)) == 1
21651 || (((to) == TDmode) + ((from) == TDmode)) == 1
21652 || (((to) == DImode) + ((from) == DImode)) == 1))
21653 return true;
21655 /* Since the VSX register set includes traditional floating point registers
21656 and altivec registers, just check for the size being different instead of
21657 trying to check whether the modes are vector modes. Otherwise it won't
21658 allow say DF and DI to change classes. For types like TFmode and TDmode
21659 that take 2 64-bit registers, rather than a single 128-bit register, don't
21660 allow subregs of those types to other 128 bit types. */
21661 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21663 unsigned num_regs = (from_size + 15) / 16;
21664 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21665 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21666 return true;
21668 return (from_size != 8 && from_size != 16);
21671 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21672 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21673 return true;
21675 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
21676 && reg_classes_intersect_p (GENERAL_REGS, rclass))
21677 return true;
21679 return false;
21682 /* Debug version of rs6000_cannot_change_mode_class. */
21683 static bool
21684 rs6000_debug_cannot_change_mode_class (machine_mode from,
21685 machine_mode to,
21686 enum reg_class rclass)
21688 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
21690 fprintf (stderr,
21691 "rs6000_cannot_change_mode_class, return %s, from = %s, "
21692 "to = %s, rclass = %s\n",
21693 ret ? "true" : "false",
21694 GET_MODE_NAME (from), GET_MODE_NAME (to),
21695 reg_class_names[rclass]);
21697 return ret;
21700 /* Return a string to do a move operation of 128 bits of data. */
21702 const char *
21703 rs6000_output_move_128bit (rtx operands[])
21705 rtx dest = operands[0];
21706 rtx src = operands[1];
21707 machine_mode mode = GET_MODE (dest);
21708 int dest_regno;
21709 int src_regno;
21710 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21711 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21713 if (REG_P (dest))
21715 dest_regno = REGNO (dest);
21716 dest_gpr_p = INT_REGNO_P (dest_regno);
21717 dest_fp_p = FP_REGNO_P (dest_regno);
21718 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21719 dest_vsx_p = dest_fp_p | dest_vmx_p;
21721 else
21723 dest_regno = -1;
21724 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21727 if (REG_P (src))
21729 src_regno = REGNO (src);
21730 src_gpr_p = INT_REGNO_P (src_regno);
21731 src_fp_p = FP_REGNO_P (src_regno);
21732 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21733 src_vsx_p = src_fp_p | src_vmx_p;
21735 else
21737 src_regno = -1;
21738 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21741 /* Register moves. */
21742 if (dest_regno >= 0 && src_regno >= 0)
21744 if (dest_gpr_p)
21746 if (src_gpr_p)
21747 return "#";
21749 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21750 return (WORDS_BIG_ENDIAN
21751 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21752 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21754 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21755 return "#";
21758 else if (TARGET_VSX && dest_vsx_p)
21760 if (src_vsx_p)
21761 return "xxlor %x0,%x1,%x1";
21763 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21764 return (WORDS_BIG_ENDIAN
21765 ? "mtvsrdd %x0,%1,%L1"
21766 : "mtvsrdd %x0,%L1,%1");
21768 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21769 return "#";
21772 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21773 return "vor %0,%1,%1";
21775 else if (dest_fp_p && src_fp_p)
21776 return "#";
21779 /* Loads. */
21780 else if (dest_regno >= 0 && MEM_P (src))
21782 if (dest_gpr_p)
21784 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21785 return "lq %0,%1";
21786 else
21787 return "#";
21790 else if (TARGET_ALTIVEC && dest_vmx_p
21791 && altivec_indexed_or_indirect_operand (src, mode))
21792 return "lvx %0,%y1";
21794 else if (TARGET_VSX && dest_vsx_p)
21796 if (mode_supports_vsx_dform_quad (mode)
21797 && quad_address_p (XEXP (src, 0), mode, true))
21798 return "lxv %x0,%1";
21800 else if (TARGET_P9_VECTOR)
21801 return "lxvx %x0,%y1";
21803 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21804 return "lxvw4x %x0,%y1";
21806 else
21807 return "lxvd2x %x0,%y1";
21810 else if (TARGET_ALTIVEC && dest_vmx_p)
21811 return "lvx %0,%y1";
21813 else if (dest_fp_p)
21814 return "#";
21817 /* Stores. */
21818 else if (src_regno >= 0 && MEM_P (dest))
21820 if (src_gpr_p)
21822 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21823 return "stq %1,%0";
21824 else
21825 return "#";
21828 else if (TARGET_ALTIVEC && src_vmx_p
21829 && altivec_indexed_or_indirect_operand (src, mode))
21830 return "stvx %1,%y0";
21832 else if (TARGET_VSX && src_vsx_p)
21834 if (mode_supports_vsx_dform_quad (mode)
21835 && quad_address_p (XEXP (dest, 0), mode, true))
21836 return "stxv %x1,%0";
21838 else if (TARGET_P9_VECTOR)
21839 return "stxvx %x1,%y0";
21841 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21842 return "stxvw4x %x1,%y0";
21844 else
21845 return "stxvd2x %x1,%y0";
21848 else if (TARGET_ALTIVEC && src_vmx_p)
21849 return "stvx %1,%y0";
21851 else if (src_fp_p)
21852 return "#";
21855 /* Constants. */
21856 else if (dest_regno >= 0
21857 && (GET_CODE (src) == CONST_INT
21858 || GET_CODE (src) == CONST_WIDE_INT
21859 || GET_CODE (src) == CONST_DOUBLE
21860 || GET_CODE (src) == CONST_VECTOR))
21862 if (dest_gpr_p)
21863 return "#";
21865 else if ((dest_vmx_p && TARGET_ALTIVEC)
21866 || (dest_vsx_p && TARGET_VSX))
21867 return output_vec_const_move (operands);
21870 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21873 /* Validate a 128-bit move. */
21874 bool
21875 rs6000_move_128bit_ok_p (rtx operands[])
21877 machine_mode mode = GET_MODE (operands[0]);
21878 return (gpc_reg_operand (operands[0], mode)
21879 || gpc_reg_operand (operands[1], mode));
21882 /* Return true if a 128-bit move needs to be split. */
21883 bool
21884 rs6000_split_128bit_ok_p (rtx operands[])
21886 if (!reload_completed)
21887 return false;
21889 if (!gpr_or_gpr_p (operands[0], operands[1]))
21890 return false;
21892 if (quad_load_store_p (operands[0], operands[1]))
21893 return false;
21895 return true;
21899 /* Given a comparison operation, return the bit number in CCR to test. We
21900 know this is a valid comparison.
21902 SCC_P is 1 if this is for an scc. That means that %D will have been
21903 used instead of %C, so the bits will be in different places.
21905 Return -1 if OP isn't a valid comparison for some reason. */
21908 ccr_bit (rtx op, int scc_p)
21910 enum rtx_code code = GET_CODE (op);
21911 machine_mode cc_mode;
21912 int cc_regnum;
21913 int base_bit;
21914 rtx reg;
21916 if (!COMPARISON_P (op))
21917 return -1;
21919 reg = XEXP (op, 0);
21921 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21923 cc_mode = GET_MODE (reg);
21924 cc_regnum = REGNO (reg);
21925 base_bit = 4 * (cc_regnum - CR0_REGNO);
21927 validate_condition_mode (code, cc_mode);
21929 /* When generating a sCOND operation, only positive conditions are
21930 allowed. */
21931 gcc_assert (!scc_p
21932 || code == EQ || code == GT || code == LT || code == UNORDERED
21933 || code == GTU || code == LTU);
21935 switch (code)
21937 case NE:
21938 return scc_p ? base_bit + 3 : base_bit + 2;
21939 case EQ:
21940 return base_bit + 2;
21941 case GT: case GTU: case UNLE:
21942 return base_bit + 1;
21943 case LT: case LTU: case UNGE:
21944 return base_bit;
21945 case ORDERED: case UNORDERED:
21946 return base_bit + 3;
21948 case GE: case GEU:
21949 /* If scc, we will have done a cror to put the bit in the
21950 unordered position. So test that bit. For integer, this is ! LT
21951 unless this is an scc insn. */
21952 return scc_p ? base_bit + 3 : base_bit;
21954 case LE: case LEU:
21955 return scc_p ? base_bit + 3 : base_bit + 1;
21957 default:
21958 gcc_unreachable ();
21962 /* Return the GOT register. */
21965 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21967 /* The second flow pass currently (June 1999) can't update
21968 regs_ever_live without disturbing other parts of the compiler, so
21969 update it here to make the prolog/epilogue code happy. */
21970 if (!can_create_pseudo_p ()
21971 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21972 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21974 crtl->uses_pic_offset_table = 1;
21976 return pic_offset_table_rtx;
21979 static rs6000_stack_t stack_info;
21981 /* Function to init struct machine_function.
21982 This will be called, via a pointer variable,
21983 from push_function_context. */
21985 static struct machine_function *
21986 rs6000_init_machine_status (void)
21988 stack_info.reload_completed = 0;
21989 return ggc_cleared_alloc<machine_function> ();
21992 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21994 /* Write out a function code label. */
21996 void
21997 rs6000_output_function_entry (FILE *file, const char *fname)
21999 if (fname[0] != '.')
22001 switch (DEFAULT_ABI)
22003 default:
22004 gcc_unreachable ();
22006 case ABI_AIX:
22007 if (DOT_SYMBOLS)
22008 putc ('.', file);
22009 else
22010 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
22011 break;
22013 case ABI_ELFv2:
22014 case ABI_V4:
22015 case ABI_DARWIN:
22016 break;
22020 RS6000_OUTPUT_BASENAME (file, fname);
22023 /* Print an operand. Recognize special options, documented below. */
22025 #if TARGET_ELF
22026 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
22027 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
22028 #else
22029 #define SMALL_DATA_RELOC "sda21"
22030 #define SMALL_DATA_REG 0
22031 #endif
22033 void
22034 print_operand (FILE *file, rtx x, int code)
22036 int i;
22037 unsigned HOST_WIDE_INT uval;
22039 switch (code)
22041 /* %a is output_address. */
22043 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
22044 output_operand. */
22046 case 'D':
22047 /* Like 'J' but get to the GT bit only. */
22048 gcc_assert (REG_P (x));
22050 /* Bit 1 is GT bit. */
22051 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
22053 /* Add one for shift count in rlinm for scc. */
22054 fprintf (file, "%d", i + 1);
22055 return;
22057 case 'e':
22058 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
22059 if (! INT_P (x))
22061 output_operand_lossage ("invalid %%e value");
22062 return;
22065 uval = INTVAL (x);
22066 if ((uval & 0xffff) == 0 && uval != 0)
22067 putc ('s', file);
22068 return;
22070 case 'E':
22071 /* X is a CR register. Print the number of the EQ bit of the CR */
22072 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22073 output_operand_lossage ("invalid %%E value");
22074 else
22075 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
22076 return;
22078 case 'f':
22079 /* X is a CR register. Print the shift count needed to move it
22080 to the high-order four bits. */
22081 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22082 output_operand_lossage ("invalid %%f value");
22083 else
22084 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
22085 return;
22087 case 'F':
22088 /* Similar, but print the count for the rotate in the opposite
22089 direction. */
22090 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22091 output_operand_lossage ("invalid %%F value");
22092 else
22093 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
22094 return;
22096 case 'G':
22097 /* X is a constant integer. If it is negative, print "m",
22098 otherwise print "z". This is to make an aze or ame insn. */
22099 if (GET_CODE (x) != CONST_INT)
22100 output_operand_lossage ("invalid %%G value");
22101 else if (INTVAL (x) >= 0)
22102 putc ('z', file);
22103 else
22104 putc ('m', file);
22105 return;
22107 case 'h':
22108 /* If constant, output low-order five bits. Otherwise, write
22109 normally. */
22110 if (INT_P (x))
22111 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
22112 else
22113 print_operand (file, x, 0);
22114 return;
22116 case 'H':
22117 /* If constant, output low-order six bits. Otherwise, write
22118 normally. */
22119 if (INT_P (x))
22120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
22121 else
22122 print_operand (file, x, 0);
22123 return;
22125 case 'I':
22126 /* Print `i' if this is a constant, else nothing. */
22127 if (INT_P (x))
22128 putc ('i', file);
22129 return;
22131 case 'j':
22132 /* Write the bit number in CCR for jump. */
22133 i = ccr_bit (x, 0);
22134 if (i == -1)
22135 output_operand_lossage ("invalid %%j code");
22136 else
22137 fprintf (file, "%d", i);
22138 return;
22140 case 'J':
22141 /* Similar, but add one for shift count in rlinm for scc and pass
22142 scc flag to `ccr_bit'. */
22143 i = ccr_bit (x, 1);
22144 if (i == -1)
22145 output_operand_lossage ("invalid %%J code");
22146 else
22147 /* If we want bit 31, write a shift count of zero, not 32. */
22148 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22149 return;
22151 case 'k':
22152 /* X must be a constant. Write the 1's complement of the
22153 constant. */
22154 if (! INT_P (x))
22155 output_operand_lossage ("invalid %%k value");
22156 else
22157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
22158 return;
22160 case 'K':
22161 /* X must be a symbolic constant on ELF. Write an
22162 expression suitable for an 'addi' that adds in the low 16
22163 bits of the MEM. */
22164 if (GET_CODE (x) == CONST)
22166 if (GET_CODE (XEXP (x, 0)) != PLUS
22167 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
22168 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
22169 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
22170 output_operand_lossage ("invalid %%K value");
22172 print_operand_address (file, x);
22173 fputs ("@l", file);
22174 return;
22176 /* %l is output_asm_label. */
22178 case 'L':
22179 /* Write second word of DImode or DFmode reference. Works on register
22180 or non-indexed memory only. */
22181 if (REG_P (x))
22182 fputs (reg_names[REGNO (x) + 1], file);
22183 else if (MEM_P (x))
22185 machine_mode mode = GET_MODE (x);
22186 /* Handle possible auto-increment. Since it is pre-increment and
22187 we have already done it, we can just use an offset of word. */
22188 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22189 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22190 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22191 UNITS_PER_WORD));
22192 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22193 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22194 UNITS_PER_WORD));
22195 else
22196 output_address (mode, XEXP (adjust_address_nv (x, SImode,
22197 UNITS_PER_WORD),
22198 0));
22200 if (small_data_operand (x, GET_MODE (x)))
22201 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22202 reg_names[SMALL_DATA_REG]);
22204 return;
22206 case 'N':
22207 /* Write the number of elements in the vector times 4. */
22208 if (GET_CODE (x) != PARALLEL)
22209 output_operand_lossage ("invalid %%N value");
22210 else
22211 fprintf (file, "%d", XVECLEN (x, 0) * 4);
22212 return;
22214 case 'O':
22215 /* Similar, but subtract 1 first. */
22216 if (GET_CODE (x) != PARALLEL)
22217 output_operand_lossage ("invalid %%O value");
22218 else
22219 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
22220 return;
22222 case 'p':
22223 /* X is a CONST_INT that is a power of two. Output the logarithm. */
22224 if (! INT_P (x)
22225 || INTVAL (x) < 0
22226 || (i = exact_log2 (INTVAL (x))) < 0)
22227 output_operand_lossage ("invalid %%p value");
22228 else
22229 fprintf (file, "%d", i);
22230 return;
22232 case 'P':
22233 /* The operand must be an indirect memory reference. The result
22234 is the register name. */
22235 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
22236 || REGNO (XEXP (x, 0)) >= 32)
22237 output_operand_lossage ("invalid %%P value");
22238 else
22239 fputs (reg_names[REGNO (XEXP (x, 0))], file);
22240 return;
22242 case 'q':
22243 /* This outputs the logical code corresponding to a boolean
22244 expression. The expression may have one or both operands
22245 negated (if one, only the first one). For condition register
22246 logical operations, it will also treat the negated
22247 CR codes as NOTs, but not handle NOTs of them. */
22249 const char *const *t = 0;
22250 const char *s;
22251 enum rtx_code code = GET_CODE (x);
22252 static const char * const tbl[3][3] = {
22253 { "and", "andc", "nor" },
22254 { "or", "orc", "nand" },
22255 { "xor", "eqv", "xor" } };
22257 if (code == AND)
22258 t = tbl[0];
22259 else if (code == IOR)
22260 t = tbl[1];
22261 else if (code == XOR)
22262 t = tbl[2];
22263 else
22264 output_operand_lossage ("invalid %%q value");
22266 if (GET_CODE (XEXP (x, 0)) != NOT)
22267 s = t[0];
22268 else
22270 if (GET_CODE (XEXP (x, 1)) == NOT)
22271 s = t[2];
22272 else
22273 s = t[1];
22276 fputs (s, file);
22278 return;
22280 case 'Q':
22281 if (! TARGET_MFCRF)
22282 return;
22283 fputc (',', file);
22284 /* FALLTHRU */
22286 case 'R':
22287 /* X is a CR register. Print the mask for `mtcrf'. */
22288 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22289 output_operand_lossage ("invalid %%R value");
22290 else
22291 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
22292 return;
22294 case 's':
22295 /* Low 5 bits of 32 - value */
22296 if (! INT_P (x))
22297 output_operand_lossage ("invalid %%s value");
22298 else
22299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
22300 return;
22302 case 't':
22303 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
22304 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
22306 /* Bit 3 is OV bit. */
22307 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
22309 /* If we want bit 31, write a shift count of zero, not 32. */
22310 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22311 return;
22313 case 'T':
22314 /* Print the symbolic name of a branch target register. */
22315 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
22316 && REGNO (x) != CTR_REGNO))
22317 output_operand_lossage ("invalid %%T value");
22318 else if (REGNO (x) == LR_REGNO)
22319 fputs ("lr", file);
22320 else
22321 fputs ("ctr", file);
22322 return;
22324 case 'u':
22325 /* High-order or low-order 16 bits of constant, whichever is non-zero,
22326 for use in unsigned operand. */
22327 if (! INT_P (x))
22329 output_operand_lossage ("invalid %%u value");
22330 return;
22333 uval = INTVAL (x);
22334 if ((uval & 0xffff) == 0)
22335 uval >>= 16;
22337 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
22338 return;
22340 case 'v':
22341 /* High-order 16 bits of constant for use in signed operand. */
22342 if (! INT_P (x))
22343 output_operand_lossage ("invalid %%v value");
22344 else
22345 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
22346 (INTVAL (x) >> 16) & 0xffff);
22347 return;
22349 case 'U':
22350 /* Print `u' if this has an auto-increment or auto-decrement. */
22351 if (MEM_P (x)
22352 && (GET_CODE (XEXP (x, 0)) == PRE_INC
22353 || GET_CODE (XEXP (x, 0)) == PRE_DEC
22354 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
22355 putc ('u', file);
22356 return;
22358 case 'V':
22359 /* Print the trap code for this operand. */
22360 switch (GET_CODE (x))
22362 case EQ:
22363 fputs ("eq", file); /* 4 */
22364 break;
22365 case NE:
22366 fputs ("ne", file); /* 24 */
22367 break;
22368 case LT:
22369 fputs ("lt", file); /* 16 */
22370 break;
22371 case LE:
22372 fputs ("le", file); /* 20 */
22373 break;
22374 case GT:
22375 fputs ("gt", file); /* 8 */
22376 break;
22377 case GE:
22378 fputs ("ge", file); /* 12 */
22379 break;
22380 case LTU:
22381 fputs ("llt", file); /* 2 */
22382 break;
22383 case LEU:
22384 fputs ("lle", file); /* 6 */
22385 break;
22386 case GTU:
22387 fputs ("lgt", file); /* 1 */
22388 break;
22389 case GEU:
22390 fputs ("lge", file); /* 5 */
22391 break;
22392 default:
22393 gcc_unreachable ();
22395 break;
22397 case 'w':
22398 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
22399 normally. */
22400 if (INT_P (x))
22401 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
22402 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
22403 else
22404 print_operand (file, x, 0);
22405 return;
22407 case 'x':
22408 /* X is a FPR or Altivec register used in a VSX context. */
22409 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
22410 output_operand_lossage ("invalid %%x value");
22411 else
22413 int reg = REGNO (x);
22414 int vsx_reg = (FP_REGNO_P (reg)
22415 ? reg - 32
22416 : reg - FIRST_ALTIVEC_REGNO + 32);
22418 #ifdef TARGET_REGNAMES
22419 if (TARGET_REGNAMES)
22420 fprintf (file, "%%vs%d", vsx_reg);
22421 else
22422 #endif
22423 fprintf (file, "%d", vsx_reg);
22425 return;
22427 case 'X':
22428 if (MEM_P (x)
22429 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
22430 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
22431 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
22432 putc ('x', file);
22433 return;
22435 case 'Y':
22436 /* Like 'L', for third word of TImode/PTImode */
22437 if (REG_P (x))
22438 fputs (reg_names[REGNO (x) + 2], file);
22439 else if (MEM_P (x))
22441 machine_mode mode = GET_MODE (x);
22442 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22443 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22444 output_address (mode, plus_constant (Pmode,
22445 XEXP (XEXP (x, 0), 0), 8));
22446 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22447 output_address (mode, plus_constant (Pmode,
22448 XEXP (XEXP (x, 0), 0), 8));
22449 else
22450 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
22451 if (small_data_operand (x, GET_MODE (x)))
22452 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22453 reg_names[SMALL_DATA_REG]);
22455 return;
22457 case 'z':
22458 /* X is a SYMBOL_REF. Write out the name preceded by a
22459 period and without any trailing data in brackets. Used for function
22460 names. If we are configured for System V (or the embedded ABI) on
22461 the PowerPC, do not emit the period, since those systems do not use
22462 TOCs and the like. */
22463 gcc_assert (GET_CODE (x) == SYMBOL_REF);
22465 /* For macho, check to see if we need a stub. */
22466 if (TARGET_MACHO)
22468 const char *name = XSTR (x, 0);
22469 #if TARGET_MACHO
22470 if (darwin_emit_branch_islands
22471 && MACHOPIC_INDIRECT
22472 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
22473 name = machopic_indirection_name (x, /*stub_p=*/true);
22474 #endif
22475 assemble_name (file, name);
22477 else if (!DOT_SYMBOLS)
22478 assemble_name (file, XSTR (x, 0));
22479 else
22480 rs6000_output_function_entry (file, XSTR (x, 0));
22481 return;
22483 case 'Z':
22484 /* Like 'L', for last word of TImode/PTImode. */
22485 if (REG_P (x))
22486 fputs (reg_names[REGNO (x) + 3], file);
22487 else if (MEM_P (x))
22489 machine_mode mode = GET_MODE (x);
22490 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22491 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22492 output_address (mode, plus_constant (Pmode,
22493 XEXP (XEXP (x, 0), 0), 12));
22494 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22495 output_address (mode, plus_constant (Pmode,
22496 XEXP (XEXP (x, 0), 0), 12));
22497 else
22498 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
22499 if (small_data_operand (x, GET_MODE (x)))
22500 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22501 reg_names[SMALL_DATA_REG]);
22503 return;
22505 /* Print AltiVec or SPE memory operand. */
22506 case 'y':
22508 rtx tmp;
22510 gcc_assert (MEM_P (x));
22512 tmp = XEXP (x, 0);
22514 /* Ugly hack because %y is overloaded. */
22515 if ((TARGET_SPE || TARGET_E500_DOUBLE)
22516 && (GET_MODE_SIZE (GET_MODE (x)) == 8
22517 || FLOAT128_2REG_P (GET_MODE (x))
22518 || GET_MODE (x) == TImode
22519 || GET_MODE (x) == PTImode))
22521 /* Handle [reg]. */
22522 if (REG_P (tmp))
22524 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
22525 break;
22527 /* Handle [reg+UIMM]. */
22528 else if (GET_CODE (tmp) == PLUS &&
22529 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
22531 int x;
22533 gcc_assert (REG_P (XEXP (tmp, 0)));
22535 x = INTVAL (XEXP (tmp, 1));
22536 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
22537 break;
22540 /* Fall through. Must be [reg+reg]. */
22542 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
22543 && GET_CODE (tmp) == AND
22544 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
22545 && INTVAL (XEXP (tmp, 1)) == -16)
22546 tmp = XEXP (tmp, 0);
22547 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
22548 && GET_CODE (tmp) == PRE_MODIFY)
22549 tmp = XEXP (tmp, 1);
22550 if (REG_P (tmp))
22551 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
22552 else
22554 if (GET_CODE (tmp) != PLUS
22555 || !REG_P (XEXP (tmp, 0))
22556 || !REG_P (XEXP (tmp, 1)))
22558 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
22559 break;
22562 if (REGNO (XEXP (tmp, 0)) == 0)
22563 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
22564 reg_names[ REGNO (XEXP (tmp, 0)) ]);
22565 else
22566 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
22567 reg_names[ REGNO (XEXP (tmp, 1)) ]);
22569 break;
22572 case 0:
22573 if (REG_P (x))
22574 fprintf (file, "%s", reg_names[REGNO (x)]);
22575 else if (MEM_P (x))
22577 /* We need to handle PRE_INC and PRE_DEC here, since we need to
22578 know the width from the mode. */
22579 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
22580 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
22581 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22582 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
22583 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
22584 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22585 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22586 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
22587 else
22588 output_address (GET_MODE (x), XEXP (x, 0));
22590 else
22592 if (toc_relative_expr_p (x, false))
22593 /* This hack along with a corresponding hack in
22594 rs6000_output_addr_const_extra arranges to output addends
22595 where the assembler expects to find them. eg.
22596 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
22597 without this hack would be output as "x@toc+4". We
22598 want "x+4@toc". */
22599 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22600 else
22601 output_addr_const (file, x);
22603 return;
22605 case '&':
22606 if (const char *name = get_some_local_dynamic_name ())
22607 assemble_name (file, name);
22608 else
22609 output_operand_lossage ("'%%&' used without any "
22610 "local dynamic TLS references");
22611 return;
22613 default:
22614 output_operand_lossage ("invalid %%xn code");
22618 /* Print the address of an operand. */
22620 void
22621 print_operand_address (FILE *file, rtx x)
22623 if (REG_P (x))
22624 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
22625 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
22626 || GET_CODE (x) == LABEL_REF)
22628 output_addr_const (file, x);
22629 if (small_data_operand (x, GET_MODE (x)))
22630 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22631 reg_names[SMALL_DATA_REG]);
22632 else
22633 gcc_assert (!TARGET_TOC);
22635 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22636 && REG_P (XEXP (x, 1)))
22638 if (REGNO (XEXP (x, 0)) == 0)
22639 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
22640 reg_names[ REGNO (XEXP (x, 0)) ]);
22641 else
22642 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
22643 reg_names[ REGNO (XEXP (x, 1)) ]);
22645 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22646 && GET_CODE (XEXP (x, 1)) == CONST_INT)
22647 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
22648 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
22649 #if TARGET_MACHO
22650 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22651 && CONSTANT_P (XEXP (x, 1)))
22653 fprintf (file, "lo16(");
22654 output_addr_const (file, XEXP (x, 1));
22655 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22657 #endif
22658 #if TARGET_ELF
22659 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22660 && CONSTANT_P (XEXP (x, 1)))
22662 output_addr_const (file, XEXP (x, 1));
22663 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22665 #endif
22666 else if (toc_relative_expr_p (x, false))
22668 /* This hack along with a corresponding hack in
22669 rs6000_output_addr_const_extra arranges to output addends
22670 where the assembler expects to find them. eg.
22671 (lo_sum (reg 9)
22672 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
22673 without this hack would be output as "x@toc+8@l(9)". We
22674 want "x+8@toc@l(9)". */
22675 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22676 if (GET_CODE (x) == LO_SUM)
22677 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
22678 else
22679 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
22681 else
22682 gcc_unreachable ();
22685 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
22687 static bool
22688 rs6000_output_addr_const_extra (FILE *file, rtx x)
22690 if (GET_CODE (x) == UNSPEC)
22691 switch (XINT (x, 1))
22693 case UNSPEC_TOCREL:
22694 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
22695 && REG_P (XVECEXP (x, 0, 1))
22696 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
22697 output_addr_const (file, XVECEXP (x, 0, 0));
22698 if (x == tocrel_base && tocrel_offset != const0_rtx)
22700 if (INTVAL (tocrel_offset) >= 0)
22701 fprintf (file, "+");
22702 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
22704 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
22706 putc ('-', file);
22707 assemble_name (file, toc_label_name);
22708 need_toc_init = 1;
22710 else if (TARGET_ELF)
22711 fputs ("@toc", file);
22712 return true;
22714 #if TARGET_MACHO
22715 case UNSPEC_MACHOPIC_OFFSET:
22716 output_addr_const (file, XVECEXP (x, 0, 0));
22717 putc ('-', file);
22718 machopic_output_function_base_name (file);
22719 return true;
22720 #endif
22722 return false;
22725 /* Target hook for assembling integer objects. The PowerPC version has
22726 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
22727 is defined. It also needs to handle DI-mode objects on 64-bit
22728 targets. */
22730 static bool
22731 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
22733 #ifdef RELOCATABLE_NEEDS_FIXUP
22734 /* Special handling for SI values. */
22735 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
22737 static int recurse = 0;
22739 /* For -mrelocatable, we mark all addresses that need to be fixed up in
22740 the .fixup section. Since the TOC section is already relocated, we
22741 don't need to mark it here. We used to skip the text section, but it
22742 should never be valid for relocated addresses to be placed in the text
22743 section. */
22744 if (DEFAULT_ABI == ABI_V4
22745 && (TARGET_RELOCATABLE || flag_pic > 1)
22746 && in_section != toc_section
22747 && !recurse
22748 && !CONST_SCALAR_INT_P (x)
22749 && CONSTANT_P (x))
22751 char buf[256];
22753 recurse = 1;
22754 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
22755 fixuplabelno++;
22756 ASM_OUTPUT_LABEL (asm_out_file, buf);
22757 fprintf (asm_out_file, "\t.long\t(");
22758 output_addr_const (asm_out_file, x);
22759 fprintf (asm_out_file, ")@fixup\n");
22760 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
22761 ASM_OUTPUT_ALIGN (asm_out_file, 2);
22762 fprintf (asm_out_file, "\t.long\t");
22763 assemble_name (asm_out_file, buf);
22764 fprintf (asm_out_file, "\n\t.previous\n");
22765 recurse = 0;
22766 return true;
22768 /* Remove initial .'s to turn a -mcall-aixdesc function
22769 address into the address of the descriptor, not the function
22770 itself. */
22771 else if (GET_CODE (x) == SYMBOL_REF
22772 && XSTR (x, 0)[0] == '.'
22773 && DEFAULT_ABI == ABI_AIX)
22775 const char *name = XSTR (x, 0);
22776 while (*name == '.')
22777 name++;
22779 fprintf (asm_out_file, "\t.long\t%s\n", name);
22780 return true;
22783 #endif /* RELOCATABLE_NEEDS_FIXUP */
22784 return default_assemble_integer (x, size, aligned_p);
22787 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22788 /* Emit an assembler directive to set symbol visibility for DECL to
22789 VISIBILITY_TYPE. */
22791 static void
22792 rs6000_assemble_visibility (tree decl, int vis)
22794 if (TARGET_XCOFF)
22795 return;
22797 /* Functions need to have their entry point symbol visibility set as
22798 well as their descriptor symbol visibility. */
22799 if (DEFAULT_ABI == ABI_AIX
22800 && DOT_SYMBOLS
22801 && TREE_CODE (decl) == FUNCTION_DECL)
22803 static const char * const visibility_types[] = {
22804 NULL, "protected", "hidden", "internal"
22807 const char *name, *type;
22809 name = ((* targetm.strip_name_encoding)
22810 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22811 type = visibility_types[vis];
22813 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22814 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22816 else
22817 default_assemble_visibility (decl, vis);
22819 #endif
22821 enum rtx_code
22822 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22824 /* Reversal of FP compares takes care -- an ordered compare
22825 becomes an unordered compare and vice versa. */
22826 if (mode == CCFPmode
22827 && (!flag_finite_math_only
22828 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22829 || code == UNEQ || code == LTGT))
22830 return reverse_condition_maybe_unordered (code);
22831 else
22832 return reverse_condition (code);
22835 /* Generate a compare for CODE. Return a brand-new rtx that
22836 represents the result of the compare. */
22838 static rtx
22839 rs6000_generate_compare (rtx cmp, machine_mode mode)
22841 machine_mode comp_mode;
22842 rtx compare_result;
22843 enum rtx_code code = GET_CODE (cmp);
22844 rtx op0 = XEXP (cmp, 0);
22845 rtx op1 = XEXP (cmp, 1);
22847 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22848 comp_mode = CCmode;
22849 else if (FLOAT_MODE_P (mode))
22850 comp_mode = CCFPmode;
22851 else if (code == GTU || code == LTU
22852 || code == GEU || code == LEU)
22853 comp_mode = CCUNSmode;
22854 else if ((code == EQ || code == NE)
22855 && unsigned_reg_p (op0)
22856 && (unsigned_reg_p (op1)
22857 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22858 /* These are unsigned values, perhaps there will be a later
22859 ordering compare that can be shared with this one. */
22860 comp_mode = CCUNSmode;
22861 else
22862 comp_mode = CCmode;
22864 /* If we have an unsigned compare, make sure we don't have a signed value as
22865 an immediate. */
22866 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22867 && INTVAL (op1) < 0)
22869 op0 = copy_rtx_if_shared (op0);
22870 op1 = force_reg (GET_MODE (op0), op1);
22871 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22874 /* First, the compare. */
22875 compare_result = gen_reg_rtx (comp_mode);
22877 /* E500 FP compare instructions on the GPRs. Yuck! */
22878 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
22879 && FLOAT_MODE_P (mode))
22881 rtx cmp, or_result, compare_result2;
22882 machine_mode op_mode = GET_MODE (op0);
22883 bool reverse_p;
22885 if (op_mode == VOIDmode)
22886 op_mode = GET_MODE (op1);
22888 /* First reverse the condition codes that aren't directly supported. */
22889 switch (code)
22891 case NE:
22892 case UNLT:
22893 case UNLE:
22894 case UNGT:
22895 case UNGE:
22896 code = reverse_condition_maybe_unordered (code);
22897 reverse_p = true;
22898 break;
22900 case EQ:
22901 case LT:
22902 case LE:
22903 case GT:
22904 case GE:
22905 reverse_p = false;
22906 break;
22908 default:
22909 gcc_unreachable ();
22912 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
22913 This explains the following mess. */
22915 switch (code)
22917 case EQ:
22918 switch (op_mode)
22920 case SFmode:
22921 cmp = (flag_finite_math_only && !flag_trapping_math)
22922 ? gen_tstsfeq_gpr (compare_result, op0, op1)
22923 : gen_cmpsfeq_gpr (compare_result, op0, op1);
22924 break;
22926 case DFmode:
22927 cmp = (flag_finite_math_only && !flag_trapping_math)
22928 ? gen_tstdfeq_gpr (compare_result, op0, op1)
22929 : gen_cmpdfeq_gpr (compare_result, op0, op1);
22930 break;
22932 case TFmode:
22933 case IFmode:
22934 case KFmode:
22935 cmp = (flag_finite_math_only && !flag_trapping_math)
22936 ? gen_tsttfeq_gpr (compare_result, op0, op1)
22937 : gen_cmptfeq_gpr (compare_result, op0, op1);
22938 break;
22940 default:
22941 gcc_unreachable ();
22943 break;
22945 case GT:
22946 case GE:
22947 switch (op_mode)
22949 case SFmode:
22950 cmp = (flag_finite_math_only && !flag_trapping_math)
22951 ? gen_tstsfgt_gpr (compare_result, op0, op1)
22952 : gen_cmpsfgt_gpr (compare_result, op0, op1);
22953 break;
22955 case DFmode:
22956 cmp = (flag_finite_math_only && !flag_trapping_math)
22957 ? gen_tstdfgt_gpr (compare_result, op0, op1)
22958 : gen_cmpdfgt_gpr (compare_result, op0, op1);
22959 break;
22961 case TFmode:
22962 case IFmode:
22963 case KFmode:
22964 cmp = (flag_finite_math_only && !flag_trapping_math)
22965 ? gen_tsttfgt_gpr (compare_result, op0, op1)
22966 : gen_cmptfgt_gpr (compare_result, op0, op1);
22967 break;
22969 default:
22970 gcc_unreachable ();
22972 break;
22974 case LT:
22975 case LE:
22976 switch (op_mode)
22978 case SFmode:
22979 cmp = (flag_finite_math_only && !flag_trapping_math)
22980 ? gen_tstsflt_gpr (compare_result, op0, op1)
22981 : gen_cmpsflt_gpr (compare_result, op0, op1);
22982 break;
22984 case DFmode:
22985 cmp = (flag_finite_math_only && !flag_trapping_math)
22986 ? gen_tstdflt_gpr (compare_result, op0, op1)
22987 : gen_cmpdflt_gpr (compare_result, op0, op1);
22988 break;
22990 case TFmode:
22991 case IFmode:
22992 case KFmode:
22993 cmp = (flag_finite_math_only && !flag_trapping_math)
22994 ? gen_tsttflt_gpr (compare_result, op0, op1)
22995 : gen_cmptflt_gpr (compare_result, op0, op1);
22996 break;
22998 default:
22999 gcc_unreachable ();
23001 break;
23003 default:
23004 gcc_unreachable ();
23007 /* Synthesize LE and GE from LT/GT || EQ. */
23008 if (code == LE || code == GE)
23010 emit_insn (cmp);
23012 compare_result2 = gen_reg_rtx (CCFPmode);
23014 /* Do the EQ. */
23015 switch (op_mode)
23017 case SFmode:
23018 cmp = (flag_finite_math_only && !flag_trapping_math)
23019 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
23020 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
23021 break;
23023 case DFmode:
23024 cmp = (flag_finite_math_only && !flag_trapping_math)
23025 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
23026 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
23027 break;
23029 case TFmode:
23030 case IFmode:
23031 case KFmode:
23032 cmp = (flag_finite_math_only && !flag_trapping_math)
23033 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
23034 : gen_cmptfeq_gpr (compare_result2, op0, op1);
23035 break;
23037 default:
23038 gcc_unreachable ();
23041 emit_insn (cmp);
23043 /* OR them together. */
23044 or_result = gen_reg_rtx (CCFPmode);
23045 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
23046 compare_result2);
23047 compare_result = or_result;
23050 code = reverse_p ? NE : EQ;
23052 emit_insn (cmp);
23055 /* IEEE 128-bit support in VSX registers when we do not have hardware
23056 support. */
23057 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
23059 rtx libfunc = NULL_RTX;
23060 bool check_nan = false;
23061 rtx dest;
23063 switch (code)
23065 case EQ:
23066 case NE:
23067 libfunc = optab_libfunc (eq_optab, mode);
23068 break;
23070 case GT:
23071 case GE:
23072 libfunc = optab_libfunc (ge_optab, mode);
23073 break;
23075 case LT:
23076 case LE:
23077 libfunc = optab_libfunc (le_optab, mode);
23078 break;
23080 case UNORDERED:
23081 case ORDERED:
23082 libfunc = optab_libfunc (unord_optab, mode);
23083 code = (code == UNORDERED) ? NE : EQ;
23084 break;
23086 case UNGE:
23087 case UNGT:
23088 check_nan = true;
23089 libfunc = optab_libfunc (ge_optab, mode);
23090 code = (code == UNGE) ? GE : GT;
23091 break;
23093 case UNLE:
23094 case UNLT:
23095 check_nan = true;
23096 libfunc = optab_libfunc (le_optab, mode);
23097 code = (code == UNLE) ? LE : LT;
23098 break;
23100 case UNEQ:
23101 case LTGT:
23102 check_nan = true;
23103 libfunc = optab_libfunc (eq_optab, mode);
23104 code = (code = UNEQ) ? EQ : NE;
23105 break;
23107 default:
23108 gcc_unreachable ();
23111 gcc_assert (libfunc);
23113 if (!check_nan)
23114 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23115 SImode, 2, op0, mode, op1, mode);
23117 /* The library signals an exception for signalling NaNs, so we need to
23118 handle isgreater, etc. by first checking isordered. */
23119 else
23121 rtx ne_rtx, normal_dest, unord_dest;
23122 rtx unord_func = optab_libfunc (unord_optab, mode);
23123 rtx join_label = gen_label_rtx ();
23124 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
23125 rtx unord_cmp = gen_reg_rtx (comp_mode);
23128 /* Test for either value being a NaN. */
23129 gcc_assert (unord_func);
23130 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
23131 SImode, 2, op0, mode, op1,
23132 mode);
23134 /* Set value (0) if either value is a NaN, and jump to the join
23135 label. */
23136 dest = gen_reg_rtx (SImode);
23137 emit_move_insn (dest, const1_rtx);
23138 emit_insn (gen_rtx_SET (unord_cmp,
23139 gen_rtx_COMPARE (comp_mode, unord_dest,
23140 const0_rtx)));
23142 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
23143 emit_jump_insn (gen_rtx_SET (pc_rtx,
23144 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
23145 join_ref,
23146 pc_rtx)));
23148 /* Do the normal comparison, knowing that the values are not
23149 NaNs. */
23150 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23151 SImode, 2, op0, mode, op1,
23152 mode);
23154 emit_insn (gen_cstoresi4 (dest,
23155 gen_rtx_fmt_ee (code, SImode, normal_dest,
23156 const0_rtx),
23157 normal_dest, const0_rtx));
23159 /* Join NaN and non-Nan paths. Compare dest against 0. */
23160 emit_label (join_label);
23161 code = NE;
23164 emit_insn (gen_rtx_SET (compare_result,
23165 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
23168 else
23170 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
23171 CLOBBERs to match cmptf_internal2 pattern. */
23172 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
23173 && FLOAT128_IBM_P (GET_MODE (op0))
23174 && TARGET_HARD_FLOAT && TARGET_FPRS)
23175 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23176 gen_rtvec (10,
23177 gen_rtx_SET (compare_result,
23178 gen_rtx_COMPARE (comp_mode, op0, op1)),
23179 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23180 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23181 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23182 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23183 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23184 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23185 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23186 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23187 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
23188 else if (GET_CODE (op1) == UNSPEC
23189 && XINT (op1, 1) == UNSPEC_SP_TEST)
23191 rtx op1b = XVECEXP (op1, 0, 0);
23192 comp_mode = CCEQmode;
23193 compare_result = gen_reg_rtx (CCEQmode);
23194 if (TARGET_64BIT)
23195 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
23196 else
23197 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
23199 else
23200 emit_insn (gen_rtx_SET (compare_result,
23201 gen_rtx_COMPARE (comp_mode, op0, op1)));
23204 /* Some kinds of FP comparisons need an OR operation;
23205 under flag_finite_math_only we don't bother. */
23206 if (FLOAT_MODE_P (mode)
23207 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
23208 && !flag_finite_math_only
23209 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
23210 && (code == LE || code == GE
23211 || code == UNEQ || code == LTGT
23212 || code == UNGT || code == UNLT))
23214 enum rtx_code or1, or2;
23215 rtx or1_rtx, or2_rtx, compare2_rtx;
23216 rtx or_result = gen_reg_rtx (CCEQmode);
23218 switch (code)
23220 case LE: or1 = LT; or2 = EQ; break;
23221 case GE: or1 = GT; or2 = EQ; break;
23222 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
23223 case LTGT: or1 = LT; or2 = GT; break;
23224 case UNGT: or1 = UNORDERED; or2 = GT; break;
23225 case UNLT: or1 = UNORDERED; or2 = LT; break;
23226 default: gcc_unreachable ();
23228 validate_condition_mode (or1, comp_mode);
23229 validate_condition_mode (or2, comp_mode);
23230 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
23231 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
23232 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
23233 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
23234 const_true_rtx);
23235 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
23237 compare_result = or_result;
23238 code = EQ;
23241 validate_condition_mode (code, GET_MODE (compare_result));
23243 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
23247 /* Return the diagnostic message string if the binary operation OP is
23248 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23250 static const char*
23251 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
23252 const_tree type1,
23253 const_tree type2)
23255 enum machine_mode mode1 = TYPE_MODE (type1);
23256 enum machine_mode mode2 = TYPE_MODE (type2);
23258 /* For complex modes, use the inner type. */
23259 if (COMPLEX_MODE_P (mode1))
23260 mode1 = GET_MODE_INNER (mode1);
23262 if (COMPLEX_MODE_P (mode2))
23263 mode2 = GET_MODE_INNER (mode2);
23265 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
23266 double to intermix unless -mfloat128-convert. */
23267 if (mode1 == mode2)
23268 return NULL;
23270 if (!TARGET_FLOAT128_CVT)
23272 if ((mode1 == KFmode && mode2 == IFmode)
23273 || (mode1 == IFmode && mode2 == KFmode))
23274 return N_("__float128 and __ibm128 cannot be used in the same "
23275 "expression");
23277 if (TARGET_IEEEQUAD
23278 && ((mode1 == IFmode && mode2 == TFmode)
23279 || (mode1 == TFmode && mode2 == IFmode)))
23280 return N_("__ibm128 and long double cannot be used in the same "
23281 "expression");
23283 if (!TARGET_IEEEQUAD
23284 && ((mode1 == KFmode && mode2 == TFmode)
23285 || (mode1 == TFmode && mode2 == KFmode)))
23286 return N_("__float128 and long double cannot be used in the same "
23287 "expression");
23290 return NULL;
23294 /* Expand floating point conversion to/from __float128 and __ibm128. */
23296 void
23297 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
23299 machine_mode dest_mode = GET_MODE (dest);
23300 machine_mode src_mode = GET_MODE (src);
23301 convert_optab cvt = unknown_optab;
23302 bool do_move = false;
23303 rtx libfunc = NULL_RTX;
23304 rtx dest2;
23305 typedef rtx (*rtx_2func_t) (rtx, rtx);
23306 rtx_2func_t hw_convert = (rtx_2func_t)0;
23307 size_t kf_or_tf;
23309 struct hw_conv_t {
23310 rtx_2func_t from_df;
23311 rtx_2func_t from_sf;
23312 rtx_2func_t from_si_sign;
23313 rtx_2func_t from_si_uns;
23314 rtx_2func_t from_di_sign;
23315 rtx_2func_t from_di_uns;
23316 rtx_2func_t to_df;
23317 rtx_2func_t to_sf;
23318 rtx_2func_t to_si_sign;
23319 rtx_2func_t to_si_uns;
23320 rtx_2func_t to_di_sign;
23321 rtx_2func_t to_di_uns;
23322 } hw_conversions[2] = {
23323 /* convertions to/from KFmode */
23325 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
23326 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
23327 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
23328 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
23329 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
23330 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
23331 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
23332 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
23333 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
23334 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
23335 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
23336 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
23339 /* convertions to/from TFmode */
23341 gen_extenddftf2_hw, /* TFmode <- DFmode. */
23342 gen_extendsftf2_hw, /* TFmode <- SFmode. */
23343 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
23344 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
23345 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
23346 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
23347 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
23348 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
23349 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
23350 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
23351 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
23352 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
23356 if (dest_mode == src_mode)
23357 gcc_unreachable ();
23359 /* Eliminate memory operations. */
23360 if (MEM_P (src))
23361 src = force_reg (src_mode, src);
23363 if (MEM_P (dest))
23365 rtx tmp = gen_reg_rtx (dest_mode);
23366 rs6000_expand_float128_convert (tmp, src, unsigned_p);
23367 rs6000_emit_move (dest, tmp, dest_mode);
23368 return;
23371 /* Convert to IEEE 128-bit floating point. */
23372 if (FLOAT128_IEEE_P (dest_mode))
23374 if (dest_mode == KFmode)
23375 kf_or_tf = 0;
23376 else if (dest_mode == TFmode)
23377 kf_or_tf = 1;
23378 else
23379 gcc_unreachable ();
23381 switch (src_mode)
23383 case DFmode:
23384 cvt = sext_optab;
23385 hw_convert = hw_conversions[kf_or_tf].from_df;
23386 break;
23388 case SFmode:
23389 cvt = sext_optab;
23390 hw_convert = hw_conversions[kf_or_tf].from_sf;
23391 break;
23393 case KFmode:
23394 case IFmode:
23395 case TFmode:
23396 if (FLOAT128_IBM_P (src_mode))
23397 cvt = sext_optab;
23398 else
23399 do_move = true;
23400 break;
23402 case SImode:
23403 if (unsigned_p)
23405 cvt = ufloat_optab;
23406 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
23408 else
23410 cvt = sfloat_optab;
23411 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
23413 break;
23415 case DImode:
23416 if (unsigned_p)
23418 cvt = ufloat_optab;
23419 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
23421 else
23423 cvt = sfloat_optab;
23424 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
23426 break;
23428 default:
23429 gcc_unreachable ();
23433 /* Convert from IEEE 128-bit floating point. */
23434 else if (FLOAT128_IEEE_P (src_mode))
23436 if (src_mode == KFmode)
23437 kf_or_tf = 0;
23438 else if (src_mode == TFmode)
23439 kf_or_tf = 1;
23440 else
23441 gcc_unreachable ();
23443 switch (dest_mode)
23445 case DFmode:
23446 cvt = trunc_optab;
23447 hw_convert = hw_conversions[kf_or_tf].to_df;
23448 break;
23450 case SFmode:
23451 cvt = trunc_optab;
23452 hw_convert = hw_conversions[kf_or_tf].to_sf;
23453 break;
23455 case KFmode:
23456 case IFmode:
23457 case TFmode:
23458 if (FLOAT128_IBM_P (dest_mode))
23459 cvt = trunc_optab;
23460 else
23461 do_move = true;
23462 break;
23464 case SImode:
23465 if (unsigned_p)
23467 cvt = ufix_optab;
23468 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
23470 else
23472 cvt = sfix_optab;
23473 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
23475 break;
23477 case DImode:
23478 if (unsigned_p)
23480 cvt = ufix_optab;
23481 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
23483 else
23485 cvt = sfix_optab;
23486 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
23488 break;
23490 default:
23491 gcc_unreachable ();
23495 /* Both IBM format. */
23496 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
23497 do_move = true;
23499 else
23500 gcc_unreachable ();
23502 /* Handle conversion between TFmode/KFmode. */
23503 if (do_move)
23504 emit_move_insn (dest, gen_lowpart (dest_mode, src));
23506 /* Handle conversion if we have hardware support. */
23507 else if (TARGET_FLOAT128_HW && hw_convert)
23508 emit_insn ((hw_convert) (dest, src));
23510 /* Call an external function to do the conversion. */
23511 else if (cvt != unknown_optab)
23513 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
23514 gcc_assert (libfunc != NULL_RTX);
23516 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
23517 src_mode);
23519 gcc_assert (dest2 != NULL_RTX);
23520 if (!rtx_equal_p (dest, dest2))
23521 emit_move_insn (dest, dest2);
23524 else
23525 gcc_unreachable ();
23527 return;
23530 /* Split a conversion from __float128 to an integer type into separate insns.
23531 OPERANDS points to the destination, source, and V2DI temporary
23532 register. CODE is either FIX or UNSIGNED_FIX. */
23534 void
23535 convert_float128_to_int (rtx *operands, enum rtx_code code)
23537 rtx dest = operands[0];
23538 rtx src = operands[1];
23539 rtx tmp = operands[2];
23540 rtx cvt;
23541 rtvec cvt_vec;
23542 rtx cvt_unspec;
23543 rtvec move_vec;
23544 rtx move_unspec;
23546 if (GET_CODE (tmp) == SCRATCH)
23547 tmp = gen_reg_rtx (V2DImode);
23549 if (MEM_P (dest))
23550 dest = rs6000_address_for_fpconvert (dest);
23552 /* Generate the actual convert insn of the form:
23553 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
23554 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
23555 cvt_vec = gen_rtvec (1, cvt);
23556 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23557 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
23559 /* Generate the move insn of the form:
23560 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
23561 move_vec = gen_rtvec (1, tmp);
23562 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
23563 emit_insn (gen_rtx_SET (dest, move_unspec));
23566 /* Split a conversion from an integer type to __float128 into separate insns.
23567 OPERANDS points to the destination, source, and V2DI temporary
23568 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
23570 void
23571 convert_int_to_float128 (rtx *operands, enum rtx_code code)
23573 rtx dest = operands[0];
23574 rtx src = operands[1];
23575 rtx tmp = operands[2];
23576 rtx cvt;
23577 rtvec cvt_vec;
23578 rtx cvt_unspec;
23579 rtvec move_vec;
23580 rtx move_unspec;
23581 rtx unsigned_flag;
23583 if (GET_CODE (tmp) == SCRATCH)
23584 tmp = gen_reg_rtx (V2DImode);
23586 if (MEM_P (src))
23587 src = rs6000_address_for_fpconvert (src);
23589 /* Generate the move of the integer into the Altivec register of the form:
23590 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
23591 (const_int 0)] UNSPEC_IEEE128_MOVE)).
23594 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
23596 if (GET_MODE (src) == SImode)
23598 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
23599 move_vec = gen_rtvec (2, src, unsigned_flag);
23601 else
23602 move_vec = gen_rtvec (1, src);
23604 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
23605 emit_insn (gen_rtx_SET (tmp, move_unspec));
23607 /* Generate the actual convert insn of the form:
23608 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
23609 UNSPEC_IEEE128_CONVERT))). */
23610 cvt_vec = gen_rtvec (1, tmp);
23611 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23612 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
23613 emit_insn (gen_rtx_SET (dest, cvt));
23617 /* Emit the RTL for an sISEL pattern. */
23619 void
23620 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
23622 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
23625 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
23626 can be used as that dest register. Return the dest register. */
23629 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
23631 if (op2 == const0_rtx)
23632 return op1;
23634 if (GET_CODE (scratch) == SCRATCH)
23635 scratch = gen_reg_rtx (mode);
23637 if (logical_operand (op2, mode))
23638 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
23639 else
23640 emit_insn (gen_rtx_SET (scratch,
23641 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
23643 return scratch;
23646 void
23647 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
23649 rtx condition_rtx;
23650 machine_mode op_mode;
23651 enum rtx_code cond_code;
23652 rtx result = operands[0];
23654 condition_rtx = rs6000_generate_compare (operands[1], mode);
23655 cond_code = GET_CODE (condition_rtx);
23657 if (FLOAT_MODE_P (mode)
23658 && !TARGET_FPRS && TARGET_HARD_FLOAT)
23660 rtx t;
23662 PUT_MODE (condition_rtx, SImode);
23663 t = XEXP (condition_rtx, 0);
23665 gcc_assert (cond_code == NE || cond_code == EQ);
23667 if (cond_code == NE)
23668 emit_insn (gen_e500_flip_gt_bit (t, t));
23670 emit_insn (gen_move_from_CR_gt_bit (result, t));
23671 return;
23674 if (cond_code == NE
23675 || cond_code == GE || cond_code == LE
23676 || cond_code == GEU || cond_code == LEU
23677 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
23679 rtx not_result = gen_reg_rtx (CCEQmode);
23680 rtx not_op, rev_cond_rtx;
23681 machine_mode cc_mode;
23683 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
23685 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
23686 SImode, XEXP (condition_rtx, 0), const0_rtx);
23687 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
23688 emit_insn (gen_rtx_SET (not_result, not_op));
23689 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
23692 op_mode = GET_MODE (XEXP (operands[1], 0));
23693 if (op_mode == VOIDmode)
23694 op_mode = GET_MODE (XEXP (operands[1], 1));
23696 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
23698 PUT_MODE (condition_rtx, DImode);
23699 convert_move (result, condition_rtx, 0);
23701 else
23703 PUT_MODE (condition_rtx, SImode);
23704 emit_insn (gen_rtx_SET (result, condition_rtx));
23708 /* Emit a branch of kind CODE to location LOC. */
23710 void
23711 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
23713 rtx condition_rtx, loc_ref;
23715 condition_rtx = rs6000_generate_compare (operands[0], mode);
23716 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
23717 emit_jump_insn (gen_rtx_SET (pc_rtx,
23718 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
23719 loc_ref, pc_rtx)));
23722 /* Return the string to output a conditional branch to LABEL, which is
23723 the operand template of the label, or NULL if the branch is really a
23724 conditional return.
23726 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
23727 condition code register and its mode specifies what kind of
23728 comparison we made.
23730 REVERSED is nonzero if we should reverse the sense of the comparison.
23732 INSN is the insn. */
23734 char *
23735 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
23737 static char string[64];
23738 enum rtx_code code = GET_CODE (op);
23739 rtx cc_reg = XEXP (op, 0);
23740 machine_mode mode = GET_MODE (cc_reg);
23741 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
23742 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
23743 int really_reversed = reversed ^ need_longbranch;
23744 char *s = string;
23745 const char *ccode;
23746 const char *pred;
23747 rtx note;
23749 validate_condition_mode (code, mode);
23751 /* Work out which way this really branches. We could use
23752 reverse_condition_maybe_unordered here always but this
23753 makes the resulting assembler clearer. */
23754 if (really_reversed)
23756 /* Reversal of FP compares takes care -- an ordered compare
23757 becomes an unordered compare and vice versa. */
23758 if (mode == CCFPmode)
23759 code = reverse_condition_maybe_unordered (code);
23760 else
23761 code = reverse_condition (code);
23764 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
23766 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
23767 to the GT bit. */
23768 switch (code)
23770 case EQ:
23771 /* Opposite of GT. */
23772 code = GT;
23773 break;
23775 case NE:
23776 code = UNLE;
23777 break;
23779 default:
23780 gcc_unreachable ();
23784 switch (code)
23786 /* Not all of these are actually distinct opcodes, but
23787 we distinguish them for clarity of the resulting assembler. */
23788 case NE: case LTGT:
23789 ccode = "ne"; break;
23790 case EQ: case UNEQ:
23791 ccode = "eq"; break;
23792 case GE: case GEU:
23793 ccode = "ge"; break;
23794 case GT: case GTU: case UNGT:
23795 ccode = "gt"; break;
23796 case LE: case LEU:
23797 ccode = "le"; break;
23798 case LT: case LTU: case UNLT:
23799 ccode = "lt"; break;
23800 case UNORDERED: ccode = "un"; break;
23801 case ORDERED: ccode = "nu"; break;
23802 case UNGE: ccode = "nl"; break;
23803 case UNLE: ccode = "ng"; break;
23804 default:
23805 gcc_unreachable ();
23808 /* Maybe we have a guess as to how likely the branch is. */
23809 pred = "";
23810 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
23811 if (note != NULL_RTX)
23813 /* PROB is the difference from 50%. */
23814 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
23816 /* Only hint for highly probable/improbable branches on newer cpus when
23817 we have real profile data, as static prediction overrides processor
23818 dynamic prediction. For older cpus we may as well always hint, but
23819 assume not taken for branches that are very close to 50% as a
23820 mispredicted taken branch is more expensive than a
23821 mispredicted not-taken branch. */
23822 if (rs6000_always_hint
23823 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
23824 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
23825 && br_prob_note_reliable_p (note)))
23827 if (abs (prob) > REG_BR_PROB_BASE / 20
23828 && ((prob > 0) ^ need_longbranch))
23829 pred = "+";
23830 else
23831 pred = "-";
23835 if (label == NULL)
23836 s += sprintf (s, "b%slr%s ", ccode, pred);
23837 else
23838 s += sprintf (s, "b%s%s ", ccode, pred);
23840 /* We need to escape any '%' characters in the reg_names string.
23841 Assume they'd only be the first character.... */
23842 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
23843 *s++ = '%';
23844 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
23846 if (label != NULL)
23848 /* If the branch distance was too far, we may have to use an
23849 unconditional branch to go the distance. */
23850 if (need_longbranch)
23851 s += sprintf (s, ",$+8\n\tb %s", label);
23852 else
23853 s += sprintf (s, ",%s", label);
23856 return string;
23859 /* Return the string to flip the GT bit on a CR. */
23860 char *
23861 output_e500_flip_gt_bit (rtx dst, rtx src)
23863 static char string[64];
23864 int a, b;
23866 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
23867 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
23869 /* GT bit. */
23870 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
23871 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
23873 sprintf (string, "crnot %d,%d", a, b);
23874 return string;
23877 /* Return insn for VSX or Altivec comparisons. */
23879 static rtx
23880 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
23882 rtx mask;
23883 machine_mode mode = GET_MODE (op0);
23885 switch (code)
23887 default:
23888 break;
23890 case GE:
23891 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23892 return NULL_RTX;
23893 /* FALLTHRU */
23895 case EQ:
23896 case GT:
23897 case GTU:
23898 case ORDERED:
23899 case UNORDERED:
23900 case UNEQ:
23901 case LTGT:
23902 mask = gen_reg_rtx (mode);
23903 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
23904 return mask;
23907 return NULL_RTX;
23910 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
23911 DMODE is expected destination mode. This is a recursive function. */
23913 static rtx
23914 rs6000_emit_vector_compare (enum rtx_code rcode,
23915 rtx op0, rtx op1,
23916 machine_mode dmode)
23918 rtx mask;
23919 bool swap_operands = false;
23920 bool try_again = false;
23922 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
23923 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
23925 /* See if the comparison works as is. */
23926 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23927 if (mask)
23928 return mask;
23930 switch (rcode)
23932 case LT:
23933 rcode = GT;
23934 swap_operands = true;
23935 try_again = true;
23936 break;
23937 case LTU:
23938 rcode = GTU;
23939 swap_operands = true;
23940 try_again = true;
23941 break;
23942 case NE:
23943 case UNLE:
23944 case UNLT:
23945 case UNGE:
23946 case UNGT:
23947 /* Invert condition and try again.
23948 e.g., A != B becomes ~(A==B). */
23950 enum rtx_code rev_code;
23951 enum insn_code nor_code;
23952 rtx mask2;
23954 rev_code = reverse_condition_maybe_unordered (rcode);
23955 if (rev_code == UNKNOWN)
23956 return NULL_RTX;
23958 nor_code = optab_handler (one_cmpl_optab, dmode);
23959 if (nor_code == CODE_FOR_nothing)
23960 return NULL_RTX;
23962 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
23963 if (!mask2)
23964 return NULL_RTX;
23966 mask = gen_reg_rtx (dmode);
23967 emit_insn (GEN_FCN (nor_code) (mask, mask2));
23968 return mask;
23970 break;
23971 case GE:
23972 case GEU:
23973 case LE:
23974 case LEU:
23975 /* Try GT/GTU/LT/LTU OR EQ */
23977 rtx c_rtx, eq_rtx;
23978 enum insn_code ior_code;
23979 enum rtx_code new_code;
23981 switch (rcode)
23983 case GE:
23984 new_code = GT;
23985 break;
23987 case GEU:
23988 new_code = GTU;
23989 break;
23991 case LE:
23992 new_code = LT;
23993 break;
23995 case LEU:
23996 new_code = LTU;
23997 break;
23999 default:
24000 gcc_unreachable ();
24003 ior_code = optab_handler (ior_optab, dmode);
24004 if (ior_code == CODE_FOR_nothing)
24005 return NULL_RTX;
24007 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
24008 if (!c_rtx)
24009 return NULL_RTX;
24011 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
24012 if (!eq_rtx)
24013 return NULL_RTX;
24015 mask = gen_reg_rtx (dmode);
24016 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
24017 return mask;
24019 break;
24020 default:
24021 return NULL_RTX;
24024 if (try_again)
24026 if (swap_operands)
24027 std::swap (op0, op1);
24029 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24030 if (mask)
24031 return mask;
24034 /* You only get two chances. */
24035 return NULL_RTX;
24038 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
24039 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
24040 operands for the relation operation COND. */
24043 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
24044 rtx cond, rtx cc_op0, rtx cc_op1)
24046 machine_mode dest_mode = GET_MODE (dest);
24047 machine_mode mask_mode = GET_MODE (cc_op0);
24048 enum rtx_code rcode = GET_CODE (cond);
24049 machine_mode cc_mode = CCmode;
24050 rtx mask;
24051 rtx cond2;
24052 rtx tmp;
24053 bool invert_move = false;
24055 if (VECTOR_UNIT_NONE_P (dest_mode))
24056 return 0;
24058 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
24059 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
24061 switch (rcode)
24063 /* Swap operands if we can, and fall back to doing the operation as
24064 specified, and doing a NOR to invert the test. */
24065 case NE:
24066 case UNLE:
24067 case UNLT:
24068 case UNGE:
24069 case UNGT:
24070 /* Invert condition and try again.
24071 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
24072 invert_move = true;
24073 rcode = reverse_condition_maybe_unordered (rcode);
24074 if (rcode == UNKNOWN)
24075 return 0;
24076 break;
24078 /* Mark unsigned tests with CCUNSmode. */
24079 case GTU:
24080 case GEU:
24081 case LTU:
24082 case LEU:
24083 cc_mode = CCUNSmode;
24084 break;
24086 default:
24087 break;
24090 /* Get the vector mask for the given relational operations. */
24091 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
24093 if (!mask)
24094 return 0;
24096 if (invert_move)
24098 tmp = op_true;
24099 op_true = op_false;
24100 op_false = tmp;
24103 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
24104 CONST0_RTX (dest_mode));
24105 emit_insn (gen_rtx_SET (dest,
24106 gen_rtx_IF_THEN_ELSE (dest_mode,
24107 cond2,
24108 op_true,
24109 op_false)));
24110 return 1;
24113 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
24114 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
24115 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
24116 hardware has no such operation. */
24118 static int
24119 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24121 enum rtx_code code = GET_CODE (op);
24122 rtx op0 = XEXP (op, 0);
24123 rtx op1 = XEXP (op, 1);
24124 machine_mode compare_mode = GET_MODE (op0);
24125 machine_mode result_mode = GET_MODE (dest);
24126 bool max_p = false;
24128 if (result_mode != compare_mode)
24129 return 0;
24131 if (code == GE || code == GT)
24132 max_p = true;
24133 else if (code == LE || code == LT)
24134 max_p = false;
24135 else
24136 return 0;
24138 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
24141 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
24142 max_p = !max_p;
24144 else
24145 return 0;
24147 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
24148 return 1;
24151 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
24152 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
24153 operands of the last comparison is nonzero/true, FALSE_COND if it is
24154 zero/false. Return 0 if the hardware has no such operation. */
24156 static int
24157 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24159 enum rtx_code code = GET_CODE (op);
24160 rtx op0 = XEXP (op, 0);
24161 rtx op1 = XEXP (op, 1);
24162 machine_mode result_mode = GET_MODE (dest);
24163 rtx compare_rtx;
24164 rtx cmove_rtx;
24165 rtx clobber_rtx;
24167 if (!can_create_pseudo_p ())
24168 return 0;
24170 switch (code)
24172 case EQ:
24173 case GE:
24174 case GT:
24175 break;
24177 case NE:
24178 case LT:
24179 case LE:
24180 code = swap_condition (code);
24181 std::swap (op0, op1);
24182 break;
24184 default:
24185 return 0;
24188 /* Generate: [(parallel [(set (dest)
24189 (if_then_else (op (cmp1) (cmp2))
24190 (true)
24191 (false)))
24192 (clobber (scratch))])]. */
24194 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
24195 cmove_rtx = gen_rtx_SET (dest,
24196 gen_rtx_IF_THEN_ELSE (result_mode,
24197 compare_rtx,
24198 true_cond,
24199 false_cond));
24201 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
24202 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24203 gen_rtvec (2, cmove_rtx, clobber_rtx)));
24205 return 1;
24208 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
24209 operands of the last comparison is nonzero/true, FALSE_COND if it
24210 is zero/false. Return 0 if the hardware has no such operation. */
24213 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24215 enum rtx_code code = GET_CODE (op);
24216 rtx op0 = XEXP (op, 0);
24217 rtx op1 = XEXP (op, 1);
24218 machine_mode compare_mode = GET_MODE (op0);
24219 machine_mode result_mode = GET_MODE (dest);
24220 rtx temp;
24221 bool is_against_zero;
24223 /* These modes should always match. */
24224 if (GET_MODE (op1) != compare_mode
24225 /* In the isel case however, we can use a compare immediate, so
24226 op1 may be a small constant. */
24227 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
24228 return 0;
24229 if (GET_MODE (true_cond) != result_mode)
24230 return 0;
24231 if (GET_MODE (false_cond) != result_mode)
24232 return 0;
24234 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
24235 if (TARGET_P9_MINMAX
24236 && (compare_mode == SFmode || compare_mode == DFmode)
24237 && (result_mode == SFmode || result_mode == DFmode))
24239 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
24240 return 1;
24242 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
24243 return 1;
24246 /* Don't allow using floating point comparisons for integer results for
24247 now. */
24248 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
24249 return 0;
24251 /* First, work out if the hardware can do this at all, or
24252 if it's too slow.... */
24253 if (!FLOAT_MODE_P (compare_mode))
24255 if (TARGET_ISEL)
24256 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
24257 return 0;
24259 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
24260 && SCALAR_FLOAT_MODE_P (compare_mode))
24261 return 0;
24263 is_against_zero = op1 == CONST0_RTX (compare_mode);
24265 /* A floating-point subtract might overflow, underflow, or produce
24266 an inexact result, thus changing the floating-point flags, so it
24267 can't be generated if we care about that. It's safe if one side
24268 of the construct is zero, since then no subtract will be
24269 generated. */
24270 if (SCALAR_FLOAT_MODE_P (compare_mode)
24271 && flag_trapping_math && ! is_against_zero)
24272 return 0;
24274 /* Eliminate half of the comparisons by switching operands, this
24275 makes the remaining code simpler. */
24276 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
24277 || code == LTGT || code == LT || code == UNLE)
24279 code = reverse_condition_maybe_unordered (code);
24280 temp = true_cond;
24281 true_cond = false_cond;
24282 false_cond = temp;
24285 /* UNEQ and LTGT take four instructions for a comparison with zero,
24286 it'll probably be faster to use a branch here too. */
24287 if (code == UNEQ && HONOR_NANS (compare_mode))
24288 return 0;
24290 /* We're going to try to implement comparisons by performing
24291 a subtract, then comparing against zero. Unfortunately,
24292 Inf - Inf is NaN which is not zero, and so if we don't
24293 know that the operand is finite and the comparison
24294 would treat EQ different to UNORDERED, we can't do it. */
24295 if (HONOR_INFINITIES (compare_mode)
24296 && code != GT && code != UNGE
24297 && (GET_CODE (op1) != CONST_DOUBLE
24298 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
24299 /* Constructs of the form (a OP b ? a : b) are safe. */
24300 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
24301 || (! rtx_equal_p (op0, true_cond)
24302 && ! rtx_equal_p (op1, true_cond))))
24303 return 0;
24305 /* At this point we know we can use fsel. */
24307 /* Reduce the comparison to a comparison against zero. */
24308 if (! is_against_zero)
24310 temp = gen_reg_rtx (compare_mode);
24311 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
24312 op0 = temp;
24313 op1 = CONST0_RTX (compare_mode);
24316 /* If we don't care about NaNs we can reduce some of the comparisons
24317 down to faster ones. */
24318 if (! HONOR_NANS (compare_mode))
24319 switch (code)
24321 case GT:
24322 code = LE;
24323 temp = true_cond;
24324 true_cond = false_cond;
24325 false_cond = temp;
24326 break;
24327 case UNGE:
24328 code = GE;
24329 break;
24330 case UNEQ:
24331 code = EQ;
24332 break;
24333 default:
24334 break;
24337 /* Now, reduce everything down to a GE. */
24338 switch (code)
24340 case GE:
24341 break;
24343 case LE:
24344 temp = gen_reg_rtx (compare_mode);
24345 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24346 op0 = temp;
24347 break;
24349 case ORDERED:
24350 temp = gen_reg_rtx (compare_mode);
24351 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
24352 op0 = temp;
24353 break;
24355 case EQ:
24356 temp = gen_reg_rtx (compare_mode);
24357 emit_insn (gen_rtx_SET (temp,
24358 gen_rtx_NEG (compare_mode,
24359 gen_rtx_ABS (compare_mode, op0))));
24360 op0 = temp;
24361 break;
24363 case UNGE:
24364 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
24365 temp = gen_reg_rtx (result_mode);
24366 emit_insn (gen_rtx_SET (temp,
24367 gen_rtx_IF_THEN_ELSE (result_mode,
24368 gen_rtx_GE (VOIDmode,
24369 op0, op1),
24370 true_cond, false_cond)));
24371 false_cond = true_cond;
24372 true_cond = temp;
24374 temp = gen_reg_rtx (compare_mode);
24375 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24376 op0 = temp;
24377 break;
24379 case GT:
24380 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
24381 temp = gen_reg_rtx (result_mode);
24382 emit_insn (gen_rtx_SET (temp,
24383 gen_rtx_IF_THEN_ELSE (result_mode,
24384 gen_rtx_GE (VOIDmode,
24385 op0, op1),
24386 true_cond, false_cond)));
24387 true_cond = false_cond;
24388 false_cond = temp;
24390 temp = gen_reg_rtx (compare_mode);
24391 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24392 op0 = temp;
24393 break;
24395 default:
24396 gcc_unreachable ();
24399 emit_insn (gen_rtx_SET (dest,
24400 gen_rtx_IF_THEN_ELSE (result_mode,
24401 gen_rtx_GE (VOIDmode,
24402 op0, op1),
24403 true_cond, false_cond)));
24404 return 1;
24407 /* Same as above, but for ints (isel). */
24409 static int
24410 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24412 rtx condition_rtx, cr;
24413 machine_mode mode = GET_MODE (dest);
24414 enum rtx_code cond_code;
24415 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
24416 bool signedp;
24418 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
24419 return 0;
24421 /* We still have to do the compare, because isel doesn't do a
24422 compare, it just looks at the CRx bits set by a previous compare
24423 instruction. */
24424 condition_rtx = rs6000_generate_compare (op, mode);
24425 cond_code = GET_CODE (condition_rtx);
24426 cr = XEXP (condition_rtx, 0);
24427 signedp = GET_MODE (cr) == CCmode;
24429 isel_func = (mode == SImode
24430 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
24431 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
24433 switch (cond_code)
24435 case LT: case GT: case LTU: case GTU: case EQ:
24436 /* isel handles these directly. */
24437 break;
24439 default:
24440 /* We need to swap the sense of the comparison. */
24442 std::swap (false_cond, true_cond);
24443 PUT_CODE (condition_rtx, reverse_condition (cond_code));
24445 break;
24448 false_cond = force_reg (mode, false_cond);
24449 if (true_cond != const0_rtx)
24450 true_cond = force_reg (mode, true_cond);
24452 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
24454 return 1;
24457 const char *
24458 output_isel (rtx *operands)
24460 enum rtx_code code;
24462 code = GET_CODE (operands[1]);
24464 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
24466 gcc_assert (GET_CODE (operands[2]) == REG
24467 && GET_CODE (operands[3]) == REG);
24468 PUT_CODE (operands[1], reverse_condition (code));
24469 return "isel %0,%3,%2,%j1";
24472 return "isel %0,%2,%3,%j1";
24475 void
24476 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
24478 machine_mode mode = GET_MODE (op0);
24479 enum rtx_code c;
24480 rtx target;
24482 /* VSX/altivec have direct min/max insns. */
24483 if ((code == SMAX || code == SMIN)
24484 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
24485 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
24487 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
24488 return;
24491 if (code == SMAX || code == SMIN)
24492 c = GE;
24493 else
24494 c = GEU;
24496 if (code == SMAX || code == UMAX)
24497 target = emit_conditional_move (dest, c, op0, op1, mode,
24498 op0, op1, mode, 0);
24499 else
24500 target = emit_conditional_move (dest, c, op0, op1, mode,
24501 op1, op0, mode, 0);
24502 gcc_assert (target);
24503 if (target != dest)
24504 emit_move_insn (dest, target);
24507 /* Split a signbit operation on 64-bit machines with direct move. Also allow
24508 for the value to come from memory or if it is already loaded into a GPR. */
24510 void
24511 rs6000_split_signbit (rtx dest, rtx src)
24513 machine_mode d_mode = GET_MODE (dest);
24514 machine_mode s_mode = GET_MODE (src);
24515 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
24516 rtx shift_reg = dest_di;
24518 gcc_assert (REG_P (dest));
24519 gcc_assert (REG_P (src) || MEM_P (src));
24520 gcc_assert (s_mode == KFmode || s_mode == TFmode);
24522 if (MEM_P (src))
24524 rtx mem = (WORDS_BIG_ENDIAN
24525 ? adjust_address (src, DImode, 0)
24526 : adjust_address (src, DImode, 8));
24527 emit_insn (gen_rtx_SET (dest_di, mem));
24530 else
24532 unsigned int r = REGNO (src);
24534 /* If this is a VSX register, generate the special mfvsrd instruction
24535 to get it in a GPR. Until we support SF and DF modes, that will
24536 always be true. */
24537 gcc_assert (VSX_REGNO_P (r));
24539 if (s_mode == KFmode)
24540 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
24541 else
24542 emit_insn (gen_signbittf2_dm2 (dest_di, src));
24545 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
24546 return;
24549 /* A subroutine of the atomic operation splitters. Jump to LABEL if
24550 COND is true. Mark the jump as unlikely to be taken. */
24552 static void
24553 emit_unlikely_jump (rtx cond, rtx label)
24555 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
24556 rtx x;
24558 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
24559 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
24560 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
24563 /* A subroutine of the atomic operation splitters. Emit a load-locked
24564 instruction in MODE. For QI/HImode, possibly use a pattern than includes
24565 the zero_extend operation. */
24567 static void
24568 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
24570 rtx (*fn) (rtx, rtx) = NULL;
24572 switch (mode)
24574 case QImode:
24575 fn = gen_load_lockedqi;
24576 break;
24577 case HImode:
24578 fn = gen_load_lockedhi;
24579 break;
24580 case SImode:
24581 if (GET_MODE (mem) == QImode)
24582 fn = gen_load_lockedqi_si;
24583 else if (GET_MODE (mem) == HImode)
24584 fn = gen_load_lockedhi_si;
24585 else
24586 fn = gen_load_lockedsi;
24587 break;
24588 case DImode:
24589 fn = gen_load_lockeddi;
24590 break;
24591 case TImode:
24592 fn = gen_load_lockedti;
24593 break;
24594 default:
24595 gcc_unreachable ();
24597 emit_insn (fn (reg, mem));
24600 /* A subroutine of the atomic operation splitters. Emit a store-conditional
24601 instruction in MODE. */
24603 static void
24604 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
24606 rtx (*fn) (rtx, rtx, rtx) = NULL;
24608 switch (mode)
24610 case QImode:
24611 fn = gen_store_conditionalqi;
24612 break;
24613 case HImode:
24614 fn = gen_store_conditionalhi;
24615 break;
24616 case SImode:
24617 fn = gen_store_conditionalsi;
24618 break;
24619 case DImode:
24620 fn = gen_store_conditionaldi;
24621 break;
24622 case TImode:
24623 fn = gen_store_conditionalti;
24624 break;
24625 default:
24626 gcc_unreachable ();
24629 /* Emit sync before stwcx. to address PPC405 Erratum. */
24630 if (PPC405_ERRATUM77)
24631 emit_insn (gen_hwsync ());
24633 emit_insn (fn (res, mem, val));
24636 /* Expand barriers before and after a load_locked/store_cond sequence. */
24638 static rtx
24639 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
24641 rtx addr = XEXP (mem, 0);
24642 int strict_p = (reload_in_progress || reload_completed);
24644 if (!legitimate_indirect_address_p (addr, strict_p)
24645 && !legitimate_indexed_address_p (addr, strict_p))
24647 addr = force_reg (Pmode, addr);
24648 mem = replace_equiv_address_nv (mem, addr);
24651 switch (model)
24653 case MEMMODEL_RELAXED:
24654 case MEMMODEL_CONSUME:
24655 case MEMMODEL_ACQUIRE:
24656 break;
24657 case MEMMODEL_RELEASE:
24658 case MEMMODEL_ACQ_REL:
24659 emit_insn (gen_lwsync ());
24660 break;
24661 case MEMMODEL_SEQ_CST:
24662 emit_insn (gen_hwsync ());
24663 break;
24664 default:
24665 gcc_unreachable ();
24667 return mem;
24670 static void
24671 rs6000_post_atomic_barrier (enum memmodel model)
24673 switch (model)
24675 case MEMMODEL_RELAXED:
24676 case MEMMODEL_CONSUME:
24677 case MEMMODEL_RELEASE:
24678 break;
24679 case MEMMODEL_ACQUIRE:
24680 case MEMMODEL_ACQ_REL:
24681 case MEMMODEL_SEQ_CST:
24682 emit_insn (gen_isync ());
24683 break;
24684 default:
24685 gcc_unreachable ();
24689 /* A subroutine of the various atomic expanders. For sub-word operations,
24690 we must adjust things to operate on SImode. Given the original MEM,
24691 return a new aligned memory. Also build and return the quantities by
24692 which to shift and mask. */
24694 static rtx
24695 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
24697 rtx addr, align, shift, mask, mem;
24698 HOST_WIDE_INT shift_mask;
24699 machine_mode mode = GET_MODE (orig_mem);
24701 /* For smaller modes, we have to implement this via SImode. */
24702 shift_mask = (mode == QImode ? 0x18 : 0x10);
24704 addr = XEXP (orig_mem, 0);
24705 addr = force_reg (GET_MODE (addr), addr);
24707 /* Aligned memory containing subword. Generate a new memory. We
24708 do not want any of the existing MEM_ATTR data, as we're now
24709 accessing memory outside the original object. */
24710 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
24711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24712 mem = gen_rtx_MEM (SImode, align);
24713 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
24714 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
24715 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
24717 /* Shift amount for subword relative to aligned word. */
24718 shift = gen_reg_rtx (SImode);
24719 addr = gen_lowpart (SImode, addr);
24720 rtx tmp = gen_reg_rtx (SImode);
24721 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
24722 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
24723 if (BYTES_BIG_ENDIAN)
24724 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
24725 shift, 1, OPTAB_LIB_WIDEN);
24726 *pshift = shift;
24728 /* Mask for insertion. */
24729 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
24730 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
24731 *pmask = mask;
24733 return mem;
24736 /* A subroutine of the various atomic expanders. For sub-word operands,
24737 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
24739 static rtx
24740 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
24742 rtx x;
24744 x = gen_reg_rtx (SImode);
24745 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
24746 gen_rtx_NOT (SImode, mask),
24747 oldval)));
24749 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
24751 return x;
24754 /* A subroutine of the various atomic expanders. For sub-word operands,
24755 extract WIDE to NARROW via SHIFT. */
24757 static void
24758 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
24760 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
24761 wide, 1, OPTAB_LIB_WIDEN);
24762 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
24765 /* Expand an atomic compare and swap operation. */
24767 void
24768 rs6000_expand_atomic_compare_and_swap (rtx operands[])
24770 rtx boolval, retval, mem, oldval, newval, cond;
24771 rtx label1, label2, x, mask, shift;
24772 machine_mode mode, orig_mode;
24773 enum memmodel mod_s, mod_f;
24774 bool is_weak;
24776 boolval = operands[0];
24777 retval = operands[1];
24778 mem = operands[2];
24779 oldval = operands[3];
24780 newval = operands[4];
24781 is_weak = (INTVAL (operands[5]) != 0);
24782 mod_s = memmodel_base (INTVAL (operands[6]));
24783 mod_f = memmodel_base (INTVAL (operands[7]));
24784 orig_mode = mode = GET_MODE (mem);
24786 mask = shift = NULL_RTX;
24787 if (mode == QImode || mode == HImode)
24789 /* Before power8, we didn't have access to lbarx/lharx, so generate a
24790 lwarx and shift/mask operations. With power8, we need to do the
24791 comparison in SImode, but the store is still done in QI/HImode. */
24792 oldval = convert_modes (SImode, mode, oldval, 1);
24794 if (!TARGET_SYNC_HI_QI)
24796 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24798 /* Shift and mask OLDVAL into position with the word. */
24799 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
24800 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24802 /* Shift and mask NEWVAL into position within the word. */
24803 newval = convert_modes (SImode, mode, newval, 1);
24804 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
24805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24808 /* Prepare to adjust the return value. */
24809 retval = gen_reg_rtx (SImode);
24810 mode = SImode;
24812 else if (reg_overlap_mentioned_p (retval, oldval))
24813 oldval = copy_to_reg (oldval);
24815 if (mode != TImode && !reg_or_short_operand (oldval, mode))
24816 oldval = copy_to_mode_reg (mode, oldval);
24818 if (reg_overlap_mentioned_p (retval, newval))
24819 newval = copy_to_reg (newval);
24821 mem = rs6000_pre_atomic_barrier (mem, mod_s);
24823 label1 = NULL_RTX;
24824 if (!is_weak)
24826 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24827 emit_label (XEXP (label1, 0));
24829 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24831 emit_load_locked (mode, retval, mem);
24833 x = retval;
24834 if (mask)
24835 x = expand_simple_binop (SImode, AND, retval, mask,
24836 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24838 cond = gen_reg_rtx (CCmode);
24839 /* If we have TImode, synthesize a comparison. */
24840 if (mode != TImode)
24841 x = gen_rtx_COMPARE (CCmode, x, oldval);
24842 else
24844 rtx xor1_result = gen_reg_rtx (DImode);
24845 rtx xor2_result = gen_reg_rtx (DImode);
24846 rtx or_result = gen_reg_rtx (DImode);
24847 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
24848 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
24849 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
24850 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
24852 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
24853 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
24854 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
24855 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
24858 emit_insn (gen_rtx_SET (cond, x));
24860 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24861 emit_unlikely_jump (x, label2);
24863 x = newval;
24864 if (mask)
24865 x = rs6000_mask_atomic_subword (retval, newval, mask);
24867 emit_store_conditional (orig_mode, cond, mem, x);
24869 if (!is_weak)
24871 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24872 emit_unlikely_jump (x, label1);
24875 if (!is_mm_relaxed (mod_f))
24876 emit_label (XEXP (label2, 0));
24878 rs6000_post_atomic_barrier (mod_s);
24880 if (is_mm_relaxed (mod_f))
24881 emit_label (XEXP (label2, 0));
24883 if (shift)
24884 rs6000_finish_atomic_subword (operands[1], retval, shift);
24885 else if (mode != GET_MODE (operands[1]))
24886 convert_move (operands[1], retval, 1);
24888 /* In all cases, CR0 contains EQ on success, and NE on failure. */
24889 x = gen_rtx_EQ (SImode, cond, const0_rtx);
24890 emit_insn (gen_rtx_SET (boolval, x));
24893 /* Expand an atomic exchange operation. */
24895 void
24896 rs6000_expand_atomic_exchange (rtx operands[])
24898 rtx retval, mem, val, cond;
24899 machine_mode mode;
24900 enum memmodel model;
24901 rtx label, x, mask, shift;
24903 retval = operands[0];
24904 mem = operands[1];
24905 val = operands[2];
24906 model = memmodel_base (INTVAL (operands[3]));
24907 mode = GET_MODE (mem);
24909 mask = shift = NULL_RTX;
24910 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
24912 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24914 /* Shift and mask VAL into position with the word. */
24915 val = convert_modes (SImode, mode, val, 1);
24916 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24917 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24919 /* Prepare to adjust the return value. */
24920 retval = gen_reg_rtx (SImode);
24921 mode = SImode;
24924 mem = rs6000_pre_atomic_barrier (mem, model);
24926 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24927 emit_label (XEXP (label, 0));
24929 emit_load_locked (mode, retval, mem);
24931 x = val;
24932 if (mask)
24933 x = rs6000_mask_atomic_subword (retval, val, mask);
24935 cond = gen_reg_rtx (CCmode);
24936 emit_store_conditional (mode, cond, mem, x);
24938 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24939 emit_unlikely_jump (x, label);
24941 rs6000_post_atomic_barrier (model);
24943 if (shift)
24944 rs6000_finish_atomic_subword (operands[0], retval, shift);
24947 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
24948 to perform. MEM is the memory on which to operate. VAL is the second
24949 operand of the binary operator. BEFORE and AFTER are optional locations to
24950 return the value of MEM either before of after the operation. MODEL_RTX
24951 is a CONST_INT containing the memory model to use. */
24953 void
24954 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
24955 rtx orig_before, rtx orig_after, rtx model_rtx)
24957 enum memmodel model = memmodel_base (INTVAL (model_rtx));
24958 machine_mode mode = GET_MODE (mem);
24959 machine_mode store_mode = mode;
24960 rtx label, x, cond, mask, shift;
24961 rtx before = orig_before, after = orig_after;
24963 mask = shift = NULL_RTX;
24964 /* On power8, we want to use SImode for the operation. On previous systems,
24965 use the operation in a subword and shift/mask to get the proper byte or
24966 halfword. */
24967 if (mode == QImode || mode == HImode)
24969 if (TARGET_SYNC_HI_QI)
24971 val = convert_modes (SImode, mode, val, 1);
24973 /* Prepare to adjust the return value. */
24974 before = gen_reg_rtx (SImode);
24975 if (after)
24976 after = gen_reg_rtx (SImode);
24977 mode = SImode;
24979 else
24981 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24983 /* Shift and mask VAL into position with the word. */
24984 val = convert_modes (SImode, mode, val, 1);
24985 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24986 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24988 switch (code)
24990 case IOR:
24991 case XOR:
24992 /* We've already zero-extended VAL. That is sufficient to
24993 make certain that it does not affect other bits. */
24994 mask = NULL;
24995 break;
24997 case AND:
24998 /* If we make certain that all of the other bits in VAL are
24999 set, that will be sufficient to not affect other bits. */
25000 x = gen_rtx_NOT (SImode, mask);
25001 x = gen_rtx_IOR (SImode, x, val);
25002 emit_insn (gen_rtx_SET (val, x));
25003 mask = NULL;
25004 break;
25006 case NOT:
25007 case PLUS:
25008 case MINUS:
25009 /* These will all affect bits outside the field and need
25010 adjustment via MASK within the loop. */
25011 break;
25013 default:
25014 gcc_unreachable ();
25017 /* Prepare to adjust the return value. */
25018 before = gen_reg_rtx (SImode);
25019 if (after)
25020 after = gen_reg_rtx (SImode);
25021 store_mode = mode = SImode;
25025 mem = rs6000_pre_atomic_barrier (mem, model);
25027 label = gen_label_rtx ();
25028 emit_label (label);
25029 label = gen_rtx_LABEL_REF (VOIDmode, label);
25031 if (before == NULL_RTX)
25032 before = gen_reg_rtx (mode);
25034 emit_load_locked (mode, before, mem);
25036 if (code == NOT)
25038 x = expand_simple_binop (mode, AND, before, val,
25039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25040 after = expand_simple_unop (mode, NOT, x, after, 1);
25042 else
25044 after = expand_simple_binop (mode, code, before, val,
25045 after, 1, OPTAB_LIB_WIDEN);
25048 x = after;
25049 if (mask)
25051 x = expand_simple_binop (SImode, AND, after, mask,
25052 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25053 x = rs6000_mask_atomic_subword (before, x, mask);
25055 else if (store_mode != mode)
25056 x = convert_modes (store_mode, mode, x, 1);
25058 cond = gen_reg_rtx (CCmode);
25059 emit_store_conditional (store_mode, cond, mem, x);
25061 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25062 emit_unlikely_jump (x, label);
25064 rs6000_post_atomic_barrier (model);
25066 if (shift)
25068 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
25069 then do the calcuations in a SImode register. */
25070 if (orig_before)
25071 rs6000_finish_atomic_subword (orig_before, before, shift);
25072 if (orig_after)
25073 rs6000_finish_atomic_subword (orig_after, after, shift);
25075 else if (store_mode != mode)
25077 /* QImode/HImode on machines with lbarx/lharx where we do the native
25078 operation and then do the calcuations in a SImode register. */
25079 if (orig_before)
25080 convert_move (orig_before, before, 1);
25081 if (orig_after)
25082 convert_move (orig_after, after, 1);
25084 else if (orig_after && after != orig_after)
25085 emit_move_insn (orig_after, after);
25088 /* Emit instructions to move SRC to DST. Called by splitters for
25089 multi-register moves. It will emit at most one instruction for
25090 each register that is accessed; that is, it won't emit li/lis pairs
25091 (or equivalent for 64-bit code). One of SRC or DST must be a hard
25092 register. */
25094 void
25095 rs6000_split_multireg_move (rtx dst, rtx src)
25097 /* The register number of the first register being moved. */
25098 int reg;
25099 /* The mode that is to be moved. */
25100 machine_mode mode;
25101 /* The mode that the move is being done in, and its size. */
25102 machine_mode reg_mode;
25103 int reg_mode_size;
25104 /* The number of registers that will be moved. */
25105 int nregs;
25107 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
25108 mode = GET_MODE (dst);
25109 nregs = hard_regno_nregs[reg][mode];
25110 if (FP_REGNO_P (reg))
25111 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
25112 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
25113 else if (ALTIVEC_REGNO_P (reg))
25114 reg_mode = V16QImode;
25115 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
25116 reg_mode = DFmode;
25117 else
25118 reg_mode = word_mode;
25119 reg_mode_size = GET_MODE_SIZE (reg_mode);
25121 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
25123 /* TDmode residing in FP registers is special, since the ISA requires that
25124 the lower-numbered word of a register pair is always the most significant
25125 word, even in little-endian mode. This does not match the usual subreg
25126 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
25127 the appropriate constituent registers "by hand" in little-endian mode.
25129 Note we do not need to check for destructive overlap here since TDmode
25130 can only reside in even/odd register pairs. */
25131 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
25133 rtx p_src, p_dst;
25134 int i;
25136 for (i = 0; i < nregs; i++)
25138 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
25139 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
25140 else
25141 p_src = simplify_gen_subreg (reg_mode, src, mode,
25142 i * reg_mode_size);
25144 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
25145 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
25146 else
25147 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
25148 i * reg_mode_size);
25150 emit_insn (gen_rtx_SET (p_dst, p_src));
25153 return;
25156 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
25158 /* Move register range backwards, if we might have destructive
25159 overlap. */
25160 int i;
25161 for (i = nregs - 1; i >= 0; i--)
25162 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25163 i * reg_mode_size),
25164 simplify_gen_subreg (reg_mode, src, mode,
25165 i * reg_mode_size)));
25167 else
25169 int i;
25170 int j = -1;
25171 bool used_update = false;
25172 rtx restore_basereg = NULL_RTX;
25174 if (MEM_P (src) && INT_REGNO_P (reg))
25176 rtx breg;
25178 if (GET_CODE (XEXP (src, 0)) == PRE_INC
25179 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
25181 rtx delta_rtx;
25182 breg = XEXP (XEXP (src, 0), 0);
25183 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
25184 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
25185 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
25186 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25187 src = replace_equiv_address (src, breg);
25189 else if (! rs6000_offsettable_memref_p (src, reg_mode))
25191 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
25193 rtx basereg = XEXP (XEXP (src, 0), 0);
25194 if (TARGET_UPDATE)
25196 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
25197 emit_insn (gen_rtx_SET (ndst,
25198 gen_rtx_MEM (reg_mode,
25199 XEXP (src, 0))));
25200 used_update = true;
25202 else
25203 emit_insn (gen_rtx_SET (basereg,
25204 XEXP (XEXP (src, 0), 1)));
25205 src = replace_equiv_address (src, basereg);
25207 else
25209 rtx basereg = gen_rtx_REG (Pmode, reg);
25210 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
25211 src = replace_equiv_address (src, basereg);
25215 breg = XEXP (src, 0);
25216 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
25217 breg = XEXP (breg, 0);
25219 /* If the base register we are using to address memory is
25220 also a destination reg, then change that register last. */
25221 if (REG_P (breg)
25222 && REGNO (breg) >= REGNO (dst)
25223 && REGNO (breg) < REGNO (dst) + nregs)
25224 j = REGNO (breg) - REGNO (dst);
25226 else if (MEM_P (dst) && INT_REGNO_P (reg))
25228 rtx breg;
25230 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
25231 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
25233 rtx delta_rtx;
25234 breg = XEXP (XEXP (dst, 0), 0);
25235 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
25236 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
25237 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
25239 /* We have to update the breg before doing the store.
25240 Use store with update, if available. */
25242 if (TARGET_UPDATE)
25244 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25245 emit_insn (TARGET_32BIT
25246 ? (TARGET_POWERPC64
25247 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
25248 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
25249 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
25250 used_update = true;
25252 else
25253 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25254 dst = replace_equiv_address (dst, breg);
25256 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
25257 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
25259 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
25261 rtx basereg = XEXP (XEXP (dst, 0), 0);
25262 if (TARGET_UPDATE)
25264 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25265 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
25266 XEXP (dst, 0)),
25267 nsrc));
25268 used_update = true;
25270 else
25271 emit_insn (gen_rtx_SET (basereg,
25272 XEXP (XEXP (dst, 0), 1)));
25273 dst = replace_equiv_address (dst, basereg);
25275 else
25277 rtx basereg = XEXP (XEXP (dst, 0), 0);
25278 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
25279 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
25280 && REG_P (basereg)
25281 && REG_P (offsetreg)
25282 && REGNO (basereg) != REGNO (offsetreg));
25283 if (REGNO (basereg) == 0)
25285 rtx tmp = offsetreg;
25286 offsetreg = basereg;
25287 basereg = tmp;
25289 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
25290 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
25291 dst = replace_equiv_address (dst, basereg);
25294 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
25295 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
25298 for (i = 0; i < nregs; i++)
25300 /* Calculate index to next subword. */
25301 ++j;
25302 if (j == nregs)
25303 j = 0;
25305 /* If compiler already emitted move of first word by
25306 store with update, no need to do anything. */
25307 if (j == 0 && used_update)
25308 continue;
25310 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25311 j * reg_mode_size),
25312 simplify_gen_subreg (reg_mode, src, mode,
25313 j * reg_mode_size)));
25315 if (restore_basereg != NULL_RTX)
25316 emit_insn (restore_basereg);
25321 /* This page contains routines that are used to determine what the
25322 function prologue and epilogue code will do and write them out. */
25324 static inline bool
25325 save_reg_p (int r)
25327 return !call_used_regs[r] && df_regs_ever_live_p (r);
25330 /* Determine whether the gp REG is really used. */
25332 static bool
25333 rs6000_reg_live_or_pic_offset_p (int reg)
25335 /* We need to mark the PIC offset register live for the same conditions
25336 as it is set up, or otherwise it won't be saved before we clobber it. */
25338 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
25340 if (TARGET_TOC && TARGET_MINIMAL_TOC
25341 && (crtl->calls_eh_return
25342 || df_regs_ever_live_p (reg)
25343 || get_pool_size ()))
25344 return true;
25346 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
25347 && flag_pic)
25348 return true;
25351 /* If the function calls eh_return, claim used all the registers that would
25352 be checked for liveness otherwise. */
25354 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
25355 && !call_used_regs[reg]);
25358 /* Return the first fixed-point register that is required to be
25359 saved. 32 if none. */
25362 first_reg_to_save (void)
25364 int first_reg;
25366 /* Find lowest numbered live register. */
25367 for (first_reg = 13; first_reg <= 31; first_reg++)
25368 if (save_reg_p (first_reg))
25369 break;
25371 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
25372 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
25373 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25374 || (TARGET_TOC && TARGET_MINIMAL_TOC))
25375 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
25376 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
25378 #if TARGET_MACHO
25379 if (flag_pic
25380 && crtl->uses_pic_offset_table
25381 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
25382 return RS6000_PIC_OFFSET_TABLE_REGNUM;
25383 #endif
25385 return first_reg;
25388 /* Similar, for FP regs. */
25391 first_fp_reg_to_save (void)
25393 int first_reg;
25395 /* Find lowest numbered live register. */
25396 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
25397 if (save_reg_p (first_reg))
25398 break;
25400 return first_reg;
25403 /* Similar, for AltiVec regs. */
25405 static int
25406 first_altivec_reg_to_save (void)
25408 int i;
25410 /* Stack frame remains as is unless we are in AltiVec ABI. */
25411 if (! TARGET_ALTIVEC_ABI)
25412 return LAST_ALTIVEC_REGNO + 1;
25414 /* On Darwin, the unwind routines are compiled without
25415 TARGET_ALTIVEC, and use save_world to save/restore the
25416 altivec registers when necessary. */
25417 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25418 && ! TARGET_ALTIVEC)
25419 return FIRST_ALTIVEC_REGNO + 20;
25421 /* Find lowest numbered live register. */
25422 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
25423 if (save_reg_p (i))
25424 break;
25426 return i;
25429 /* Return a 32-bit mask of the AltiVec registers we need to set in
25430 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
25431 the 32-bit word is 0. */
25433 static unsigned int
25434 compute_vrsave_mask (void)
25436 unsigned int i, mask = 0;
25438 /* On Darwin, the unwind routines are compiled without
25439 TARGET_ALTIVEC, and use save_world to save/restore the
25440 call-saved altivec registers when necessary. */
25441 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25442 && ! TARGET_ALTIVEC)
25443 mask |= 0xFFF;
25445 /* First, find out if we use _any_ altivec registers. */
25446 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25447 if (df_regs_ever_live_p (i))
25448 mask |= ALTIVEC_REG_BIT (i);
25450 if (mask == 0)
25451 return mask;
25453 /* Next, remove the argument registers from the set. These must
25454 be in the VRSAVE mask set by the caller, so we don't need to add
25455 them in again. More importantly, the mask we compute here is
25456 used to generate CLOBBERs in the set_vrsave insn, and we do not
25457 wish the argument registers to die. */
25458 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
25459 mask &= ~ALTIVEC_REG_BIT (i);
25461 /* Similarly, remove the return value from the set. */
25463 bool yes = false;
25464 diddle_return_value (is_altivec_return_reg, &yes);
25465 if (yes)
25466 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
25469 return mask;
25472 /* For a very restricted set of circumstances, we can cut down the
25473 size of prologues/epilogues by calling our own save/restore-the-world
25474 routines. */
25476 static void
25477 compute_save_world_info (rs6000_stack_t *info)
25479 info->world_save_p = 1;
25480 info->world_save_p
25481 = (WORLD_SAVE_P (info)
25482 && DEFAULT_ABI == ABI_DARWIN
25483 && !cfun->has_nonlocal_label
25484 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
25485 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
25486 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
25487 && info->cr_save_p);
25489 /* This will not work in conjunction with sibcalls. Make sure there
25490 are none. (This check is expensive, but seldom executed.) */
25491 if (WORLD_SAVE_P (info))
25493 rtx_insn *insn;
25494 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
25495 if (CALL_P (insn) && SIBLING_CALL_P (insn))
25497 info->world_save_p = 0;
25498 break;
25502 if (WORLD_SAVE_P (info))
25504 /* Even if we're not touching VRsave, make sure there's room on the
25505 stack for it, if it looks like we're calling SAVE_WORLD, which
25506 will attempt to save it. */
25507 info->vrsave_size = 4;
25509 /* If we are going to save the world, we need to save the link register too. */
25510 info->lr_save_p = 1;
25512 /* "Save" the VRsave register too if we're saving the world. */
25513 if (info->vrsave_mask == 0)
25514 info->vrsave_mask = compute_vrsave_mask ();
25516 /* Because the Darwin register save/restore routines only handle
25517 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
25518 check. */
25519 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
25520 && (info->first_altivec_reg_save
25521 >= FIRST_SAVED_ALTIVEC_REGNO));
25524 return;
25528 static void
25529 is_altivec_return_reg (rtx reg, void *xyes)
25531 bool *yes = (bool *) xyes;
25532 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
25533 *yes = true;
25537 /* Return whether REG is a global user reg or has been specifed by
25538 -ffixed-REG. We should not restore these, and so cannot use
25539 lmw or out-of-line restore functions if there are any. We also
25540 can't save them (well, emit frame notes for them), because frame
25541 unwinding during exception handling will restore saved registers. */
25543 static bool
25544 fixed_reg_p (int reg)
25546 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
25547 backend sets it, overriding anything the user might have given. */
25548 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
25549 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
25550 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25551 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
25552 return false;
25554 return fixed_regs[reg];
25557 /* Determine the strategy for savings/restoring registers. */
25559 enum {
25560 SAVE_MULTIPLE = 0x1,
25561 SAVE_INLINE_GPRS = 0x2,
25562 SAVE_INLINE_FPRS = 0x4,
25563 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
25564 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
25565 SAVE_INLINE_VRS = 0x20,
25566 REST_MULTIPLE = 0x100,
25567 REST_INLINE_GPRS = 0x200,
25568 REST_INLINE_FPRS = 0x400,
25569 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
25570 REST_INLINE_VRS = 0x1000
25573 static int
25574 rs6000_savres_strategy (rs6000_stack_t *info,
25575 bool using_static_chain_p)
25577 int strategy = 0;
25579 /* Select between in-line and out-of-line save and restore of regs.
25580 First, all the obvious cases where we don't use out-of-line. */
25581 if (crtl->calls_eh_return
25582 || cfun->machine->ra_need_lr)
25583 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
25584 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
25585 | SAVE_INLINE_VRS | REST_INLINE_VRS);
25587 if (info->first_gp_reg_save == 32)
25588 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25590 if (info->first_fp_reg_save == 64
25591 /* The out-of-line FP routines use double-precision stores;
25592 we can't use those routines if we don't have such stores. */
25593 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
25594 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25596 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
25597 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25599 /* Define cutoff for using out-of-line functions to save registers. */
25600 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
25602 if (!optimize_size)
25604 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25605 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25606 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25608 else
25610 /* Prefer out-of-line restore if it will exit. */
25611 if (info->first_fp_reg_save > 61)
25612 strategy |= SAVE_INLINE_FPRS;
25613 if (info->first_gp_reg_save > 29)
25615 if (info->first_fp_reg_save == 64)
25616 strategy |= SAVE_INLINE_GPRS;
25617 else
25618 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25620 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
25621 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25624 else if (DEFAULT_ABI == ABI_DARWIN)
25626 if (info->first_fp_reg_save > 60)
25627 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25628 if (info->first_gp_reg_save > 29)
25629 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25630 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25632 else
25634 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25635 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
25636 || info->first_fp_reg_save > 61)
25637 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25638 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25639 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25642 /* Don't bother to try to save things out-of-line if r11 is occupied
25643 by the static chain. It would require too much fiddling and the
25644 static chain is rarely used anyway. FPRs are saved w.r.t the stack
25645 pointer on Darwin, and AIX uses r1 or r12. */
25646 if (using_static_chain_p
25647 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25648 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
25649 | SAVE_INLINE_GPRS
25650 | SAVE_INLINE_VRS);
25652 /* Saving CR interferes with the exit routines used on the SPE, so
25653 just punt here. */
25654 if (TARGET_SPE_ABI
25655 && info->spe_64bit_regs_used
25656 && info->cr_save_p)
25657 strategy |= REST_INLINE_GPRS;
25659 /* We can only use the out-of-line routines to restore fprs if we've
25660 saved all the registers from first_fp_reg_save in the prologue.
25661 Otherwise, we risk loading garbage. Of course, if we have saved
25662 out-of-line then we know we haven't skipped any fprs. */
25663 if ((strategy & SAVE_INLINE_FPRS)
25664 && !(strategy & REST_INLINE_FPRS))
25666 int i;
25668 for (i = info->first_fp_reg_save; i < 64; i++)
25669 if (fixed_regs[i] || !save_reg_p (i))
25671 strategy |= REST_INLINE_FPRS;
25672 break;
25676 /* Similarly, for altivec regs. */
25677 if ((strategy & SAVE_INLINE_VRS)
25678 && !(strategy & REST_INLINE_VRS))
25680 int i;
25682 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
25683 if (fixed_regs[i] || !save_reg_p (i))
25685 strategy |= REST_INLINE_VRS;
25686 break;
25690 /* info->lr_save_p isn't yet set if the only reason lr needs to be
25691 saved is an out-of-line save or restore. Set up the value for
25692 the next test (excluding out-of-line gprs). */
25693 bool lr_save_p = (info->lr_save_p
25694 || !(strategy & SAVE_INLINE_FPRS)
25695 || !(strategy & SAVE_INLINE_VRS)
25696 || !(strategy & REST_INLINE_FPRS)
25697 || !(strategy & REST_INLINE_VRS));
25699 if (TARGET_MULTIPLE
25700 && !TARGET_POWERPC64
25701 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
25702 && info->first_gp_reg_save < 31
25703 && !(flag_shrink_wrap
25704 && flag_shrink_wrap_separate
25705 && optimize_function_for_speed_p (cfun)))
25707 /* Prefer store multiple for saves over out-of-line routines,
25708 since the store-multiple instruction will always be smaller. */
25709 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
25711 /* The situation is more complicated with load multiple. We'd
25712 prefer to use the out-of-line routines for restores, since the
25713 "exit" out-of-line routines can handle the restore of LR and the
25714 frame teardown. However if doesn't make sense to use the
25715 out-of-line routine if that is the only reason we'd need to save
25716 LR, and we can't use the "exit" out-of-line gpr restore if we
25717 have saved some fprs; In those cases it is advantageous to use
25718 load multiple when available. */
25719 if (info->first_fp_reg_save != 64 || !lr_save_p)
25720 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
25723 /* Using the "exit" out-of-line routine does not improve code size
25724 if using it would require lr to be saved and if only saving one
25725 or two gprs. */
25726 else if (!lr_save_p && info->first_gp_reg_save > 29)
25727 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25729 /* We can only use load multiple or the out-of-line routines to
25730 restore gprs if we've saved all the registers from
25731 first_gp_reg_save. Otherwise, we risk loading garbage.
25732 Of course, if we have saved out-of-line or used stmw then we know
25733 we haven't skipped any gprs. */
25734 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
25735 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
25737 int i;
25739 for (i = info->first_gp_reg_save; i < 32; i++)
25740 if (fixed_reg_p (i) || !save_reg_p (i))
25742 strategy |= REST_INLINE_GPRS;
25743 strategy &= ~REST_MULTIPLE;
25744 break;
25748 if (TARGET_ELF && TARGET_64BIT)
25750 if (!(strategy & SAVE_INLINE_FPRS))
25751 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25752 else if (!(strategy & SAVE_INLINE_GPRS)
25753 && info->first_fp_reg_save == 64)
25754 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
25756 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
25757 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
25759 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
25760 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25762 return strategy;
25765 /* Calculate the stack information for the current function. This is
25766 complicated by having two separate calling sequences, the AIX calling
25767 sequence and the V.4 calling sequence.
25769 AIX (and Darwin/Mac OS X) stack frames look like:
25770 32-bit 64-bit
25771 SP----> +---------------------------------------+
25772 | back chain to caller | 0 0
25773 +---------------------------------------+
25774 | saved CR | 4 8 (8-11)
25775 +---------------------------------------+
25776 | saved LR | 8 16
25777 +---------------------------------------+
25778 | reserved for compilers | 12 24
25779 +---------------------------------------+
25780 | reserved for binders | 16 32
25781 +---------------------------------------+
25782 | saved TOC pointer | 20 40
25783 +---------------------------------------+
25784 | Parameter save area (P) | 24 48
25785 +---------------------------------------+
25786 | Alloca space (A) | 24+P etc.
25787 +---------------------------------------+
25788 | Local variable space (L) | 24+P+A
25789 +---------------------------------------+
25790 | Float/int conversion temporary (X) | 24+P+A+L
25791 +---------------------------------------+
25792 | Save area for AltiVec registers (W) | 24+P+A+L+X
25793 +---------------------------------------+
25794 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
25795 +---------------------------------------+
25796 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
25797 +---------------------------------------+
25798 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
25799 +---------------------------------------+
25800 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
25801 +---------------------------------------+
25802 old SP->| back chain to caller's caller |
25803 +---------------------------------------+
25805 The required alignment for AIX configurations is two words (i.e., 8
25806 or 16 bytes).
25808 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
25810 SP----> +---------------------------------------+
25811 | Back chain to caller | 0
25812 +---------------------------------------+
25813 | Save area for CR | 8
25814 +---------------------------------------+
25815 | Saved LR | 16
25816 +---------------------------------------+
25817 | Saved TOC pointer | 24
25818 +---------------------------------------+
25819 | Parameter save area (P) | 32
25820 +---------------------------------------+
25821 | Alloca space (A) | 32+P
25822 +---------------------------------------+
25823 | Local variable space (L) | 32+P+A
25824 +---------------------------------------+
25825 | Save area for AltiVec registers (W) | 32+P+A+L
25826 +---------------------------------------+
25827 | AltiVec alignment padding (Y) | 32+P+A+L+W
25828 +---------------------------------------+
25829 | Save area for GP registers (G) | 32+P+A+L+W+Y
25830 +---------------------------------------+
25831 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
25832 +---------------------------------------+
25833 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
25834 +---------------------------------------+
25837 V.4 stack frames look like:
25839 SP----> +---------------------------------------+
25840 | back chain to caller | 0
25841 +---------------------------------------+
25842 | caller's saved LR | 4
25843 +---------------------------------------+
25844 | Parameter save area (P) | 8
25845 +---------------------------------------+
25846 | Alloca space (A) | 8+P
25847 +---------------------------------------+
25848 | Varargs save area (V) | 8+P+A
25849 +---------------------------------------+
25850 | Local variable space (L) | 8+P+A+V
25851 +---------------------------------------+
25852 | Float/int conversion temporary (X) | 8+P+A+V+L
25853 +---------------------------------------+
25854 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
25855 +---------------------------------------+
25856 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
25857 +---------------------------------------+
25858 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
25859 +---------------------------------------+
25860 | SPE: area for 64-bit GP registers |
25861 +---------------------------------------+
25862 | SPE alignment padding |
25863 +---------------------------------------+
25864 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
25865 +---------------------------------------+
25866 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
25867 +---------------------------------------+
25868 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
25869 +---------------------------------------+
25870 old SP->| back chain to caller's caller |
25871 +---------------------------------------+
25873 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
25874 given. (But note below and in sysv4.h that we require only 8 and
25875 may round up the size of our stack frame anyways. The historical
25876 reason is early versions of powerpc-linux which didn't properly
25877 align the stack at program startup. A happy side-effect is that
25878 -mno-eabi libraries can be used with -meabi programs.)
25880 The EABI configuration defaults to the V.4 layout. However,
25881 the stack alignment requirements may differ. If -mno-eabi is not
25882 given, the required stack alignment is 8 bytes; if -mno-eabi is
25883 given, the required alignment is 16 bytes. (But see V.4 comment
25884 above.) */
25886 #ifndef ABI_STACK_BOUNDARY
25887 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
25888 #endif
25890 static rs6000_stack_t *
25891 rs6000_stack_info (void)
25893 /* We should never be called for thunks, we are not set up for that. */
25894 gcc_assert (!cfun->is_thunk);
25896 rs6000_stack_t *info = &stack_info;
25897 int reg_size = TARGET_32BIT ? 4 : 8;
25898 int ehrd_size;
25899 int ehcr_size;
25900 int save_align;
25901 int first_gp;
25902 HOST_WIDE_INT non_fixed_size;
25903 bool using_static_chain_p;
25905 if (reload_completed && info->reload_completed)
25906 return info;
25908 memset (info, 0, sizeof (*info));
25909 info->reload_completed = reload_completed;
25911 if (TARGET_SPE)
25913 /* Cache value so we don't rescan instruction chain over and over. */
25914 if (cfun->machine->spe_insn_chain_scanned_p == 0)
25915 cfun->machine->spe_insn_chain_scanned_p
25916 = spe_func_has_64bit_regs_p () + 1;
25917 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
25920 /* Select which calling sequence. */
25921 info->abi = DEFAULT_ABI;
25923 /* Calculate which registers need to be saved & save area size. */
25924 info->first_gp_reg_save = first_reg_to_save ();
25925 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
25926 even if it currently looks like we won't. Reload may need it to
25927 get at a constant; if so, it will have already created a constant
25928 pool entry for it. */
25929 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
25930 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25931 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25932 && crtl->uses_const_pool
25933 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
25934 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
25935 else
25936 first_gp = info->first_gp_reg_save;
25938 info->gp_size = reg_size * (32 - first_gp);
25940 /* For the SPE, we have an additional upper 32-bits on each GPR.
25941 Ideally we should save the entire 64-bits only when the upper
25942 half is used in SIMD instructions. Since we only record
25943 registers live (not the size they are used in), this proves
25944 difficult because we'd have to traverse the instruction chain at
25945 the right time, taking reload into account. This is a real pain,
25946 so we opt to save the GPRs in 64-bits always if but one register
25947 gets used in 64-bits. Otherwise, all the registers in the frame
25948 get saved in 32-bits.
25950 So... since when we save all GPRs (except the SP) in 64-bits, the
25951 traditional GP save area will be empty. */
25952 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25953 info->gp_size = 0;
25955 info->first_fp_reg_save = first_fp_reg_to_save ();
25956 info->fp_size = 8 * (64 - info->first_fp_reg_save);
25958 info->first_altivec_reg_save = first_altivec_reg_to_save ();
25959 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
25960 - info->first_altivec_reg_save);
25962 /* Does this function call anything? */
25963 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
25965 /* Determine if we need to save the condition code registers. */
25966 if (save_reg_p (CR2_REGNO)
25967 || save_reg_p (CR3_REGNO)
25968 || save_reg_p (CR4_REGNO))
25970 info->cr_save_p = 1;
25971 if (DEFAULT_ABI == ABI_V4)
25972 info->cr_size = reg_size;
25975 /* If the current function calls __builtin_eh_return, then we need
25976 to allocate stack space for registers that will hold data for
25977 the exception handler. */
25978 if (crtl->calls_eh_return)
25980 unsigned int i;
25981 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
25982 continue;
25984 /* SPE saves EH registers in 64-bits. */
25985 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
25986 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
25988 else
25989 ehrd_size = 0;
25991 /* In the ELFv2 ABI, we also need to allocate space for separate
25992 CR field save areas if the function calls __builtin_eh_return. */
25993 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25995 /* This hard-codes that we have three call-saved CR fields. */
25996 ehcr_size = 3 * reg_size;
25997 /* We do *not* use the regular CR save mechanism. */
25998 info->cr_save_p = 0;
26000 else
26001 ehcr_size = 0;
26003 /* Determine various sizes. */
26004 info->reg_size = reg_size;
26005 info->fixed_size = RS6000_SAVE_AREA;
26006 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
26007 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
26008 TARGET_ALTIVEC ? 16 : 8);
26009 if (FRAME_GROWS_DOWNWARD)
26010 info->vars_size
26011 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
26012 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
26013 - (info->fixed_size + info->vars_size + info->parm_size);
26015 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26016 info->spe_gp_size = 8 * (32 - first_gp);
26018 if (TARGET_ALTIVEC_ABI)
26019 info->vrsave_mask = compute_vrsave_mask ();
26021 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
26022 info->vrsave_size = 4;
26024 compute_save_world_info (info);
26026 /* Calculate the offsets. */
26027 switch (DEFAULT_ABI)
26029 case ABI_NONE:
26030 default:
26031 gcc_unreachable ();
26033 case ABI_AIX:
26034 case ABI_ELFv2:
26035 case ABI_DARWIN:
26036 info->fp_save_offset = -info->fp_size;
26037 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26039 if (TARGET_ALTIVEC_ABI)
26041 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
26043 /* Align stack so vector save area is on a quadword boundary.
26044 The padding goes above the vectors. */
26045 if (info->altivec_size != 0)
26046 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
26048 info->altivec_save_offset = info->vrsave_save_offset
26049 - info->altivec_padding_size
26050 - info->altivec_size;
26051 gcc_assert (info->altivec_size == 0
26052 || info->altivec_save_offset % 16 == 0);
26054 /* Adjust for AltiVec case. */
26055 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
26057 else
26058 info->ehrd_offset = info->gp_save_offset - ehrd_size;
26060 info->ehcr_offset = info->ehrd_offset - ehcr_size;
26061 info->cr_save_offset = reg_size; /* first word when 64-bit. */
26062 info->lr_save_offset = 2*reg_size;
26063 break;
26065 case ABI_V4:
26066 info->fp_save_offset = -info->fp_size;
26067 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26068 info->cr_save_offset = info->gp_save_offset - info->cr_size;
26070 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26072 /* Align stack so SPE GPR save area is aligned on a
26073 double-word boundary. */
26074 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
26075 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
26076 else
26077 info->spe_padding_size = 0;
26079 info->spe_gp_save_offset = info->cr_save_offset
26080 - info->spe_padding_size
26081 - info->spe_gp_size;
26083 /* Adjust for SPE case. */
26084 info->ehrd_offset = info->spe_gp_save_offset;
26086 else if (TARGET_ALTIVEC_ABI)
26088 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
26090 /* Align stack so vector save area is on a quadword boundary. */
26091 if (info->altivec_size != 0)
26092 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
26094 info->altivec_save_offset = info->vrsave_save_offset
26095 - info->altivec_padding_size
26096 - info->altivec_size;
26098 /* Adjust for AltiVec case. */
26099 info->ehrd_offset = info->altivec_save_offset;
26101 else
26102 info->ehrd_offset = info->cr_save_offset;
26104 info->ehrd_offset -= ehrd_size;
26105 info->lr_save_offset = reg_size;
26108 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
26109 info->save_size = RS6000_ALIGN (info->fp_size
26110 + info->gp_size
26111 + info->altivec_size
26112 + info->altivec_padding_size
26113 + info->spe_gp_size
26114 + info->spe_padding_size
26115 + ehrd_size
26116 + ehcr_size
26117 + info->cr_size
26118 + info->vrsave_size,
26119 save_align);
26121 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
26123 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
26124 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
26126 /* Determine if we need to save the link register. */
26127 if (info->calls_p
26128 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26129 && crtl->profile
26130 && !TARGET_PROFILE_KERNEL)
26131 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
26132 #ifdef TARGET_RELOCATABLE
26133 || (DEFAULT_ABI == ABI_V4
26134 && (TARGET_RELOCATABLE || flag_pic > 1)
26135 && get_pool_size () != 0)
26136 #endif
26137 || rs6000_ra_ever_killed ())
26138 info->lr_save_p = 1;
26140 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26141 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26142 && call_used_regs[STATIC_CHAIN_REGNUM]);
26143 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
26145 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
26146 || !(info->savres_strategy & SAVE_INLINE_FPRS)
26147 || !(info->savres_strategy & SAVE_INLINE_VRS)
26148 || !(info->savres_strategy & REST_INLINE_GPRS)
26149 || !(info->savres_strategy & REST_INLINE_FPRS)
26150 || !(info->savres_strategy & REST_INLINE_VRS))
26151 info->lr_save_p = 1;
26153 if (info->lr_save_p)
26154 df_set_regs_ever_live (LR_REGNO, true);
26156 /* Determine if we need to allocate any stack frame:
26158 For AIX we need to push the stack if a frame pointer is needed
26159 (because the stack might be dynamically adjusted), if we are
26160 debugging, if we make calls, or if the sum of fp_save, gp_save,
26161 and local variables are more than the space needed to save all
26162 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
26163 + 18*8 = 288 (GPR13 reserved).
26165 For V.4 we don't have the stack cushion that AIX uses, but assume
26166 that the debugger can handle stackless frames. */
26168 if (info->calls_p)
26169 info->push_p = 1;
26171 else if (DEFAULT_ABI == ABI_V4)
26172 info->push_p = non_fixed_size != 0;
26174 else if (frame_pointer_needed)
26175 info->push_p = 1;
26177 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
26178 info->push_p = 1;
26180 else
26181 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
26183 return info;
26186 /* Return true if the current function uses any GPRs in 64-bit SIMD
26187 mode. */
26189 static bool
26190 spe_func_has_64bit_regs_p (void)
26192 rtx_insn *insns, *insn;
26194 /* Functions that save and restore all the call-saved registers will
26195 need to save/restore the registers in 64-bits. */
26196 if (crtl->calls_eh_return
26197 || cfun->calls_setjmp
26198 || crtl->has_nonlocal_goto)
26199 return true;
26201 insns = get_insns ();
26203 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
26205 if (INSN_P (insn))
26207 rtx i;
26209 /* FIXME: This should be implemented with attributes...
26211 (set_attr "spe64" "true")....then,
26212 if (get_spe64(insn)) return true;
26214 It's the only reliable way to do the stuff below. */
26216 i = PATTERN (insn);
26217 if (GET_CODE (i) == SET)
26219 machine_mode mode = GET_MODE (SET_SRC (i));
26221 if (SPE_VECTOR_MODE (mode))
26222 return true;
26223 if (TARGET_E500_DOUBLE
26224 && (mode == DFmode || FLOAT128_2REG_P (mode)))
26225 return true;
26230 return false;
26233 static void
26234 debug_stack_info (rs6000_stack_t *info)
26236 const char *abi_string;
26238 if (! info)
26239 info = rs6000_stack_info ();
26241 fprintf (stderr, "\nStack information for function %s:\n",
26242 ((current_function_decl && DECL_NAME (current_function_decl))
26243 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
26244 : "<unknown>"));
26246 switch (info->abi)
26248 default: abi_string = "Unknown"; break;
26249 case ABI_NONE: abi_string = "NONE"; break;
26250 case ABI_AIX: abi_string = "AIX"; break;
26251 case ABI_ELFv2: abi_string = "ELFv2"; break;
26252 case ABI_DARWIN: abi_string = "Darwin"; break;
26253 case ABI_V4: abi_string = "V.4"; break;
26256 fprintf (stderr, "\tABI = %5s\n", abi_string);
26258 if (TARGET_ALTIVEC_ABI)
26259 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
26261 if (TARGET_SPE_ABI)
26262 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
26264 if (info->first_gp_reg_save != 32)
26265 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
26267 if (info->first_fp_reg_save != 64)
26268 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
26270 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
26271 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
26272 info->first_altivec_reg_save);
26274 if (info->lr_save_p)
26275 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
26277 if (info->cr_save_p)
26278 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
26280 if (info->vrsave_mask)
26281 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
26283 if (info->push_p)
26284 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
26286 if (info->calls_p)
26287 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
26289 if (info->gp_size)
26290 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
26292 if (info->fp_size)
26293 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
26295 if (info->altivec_size)
26296 fprintf (stderr, "\taltivec_save_offset = %5d\n",
26297 info->altivec_save_offset);
26299 if (info->spe_gp_size)
26300 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
26301 info->spe_gp_save_offset);
26303 if (info->vrsave_size)
26304 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
26305 info->vrsave_save_offset);
26307 if (info->lr_save_p)
26308 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
26310 if (info->cr_save_p)
26311 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
26313 if (info->varargs_save_offset)
26314 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
26316 if (info->total_size)
26317 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26318 info->total_size);
26320 if (info->vars_size)
26321 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26322 info->vars_size);
26324 if (info->parm_size)
26325 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
26327 if (info->fixed_size)
26328 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
26330 if (info->gp_size)
26331 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
26333 if (info->spe_gp_size)
26334 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
26336 if (info->fp_size)
26337 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
26339 if (info->altivec_size)
26340 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
26342 if (info->vrsave_size)
26343 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
26345 if (info->altivec_padding_size)
26346 fprintf (stderr, "\taltivec_padding_size= %5d\n",
26347 info->altivec_padding_size);
26349 if (info->spe_padding_size)
26350 fprintf (stderr, "\tspe_padding_size = %5d\n",
26351 info->spe_padding_size);
26353 if (info->cr_size)
26354 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
26356 if (info->save_size)
26357 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
26359 if (info->reg_size != 4)
26360 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
26362 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
26364 fprintf (stderr, "\n");
26368 rs6000_return_addr (int count, rtx frame)
26370 /* Currently we don't optimize very well between prolog and body
26371 code and for PIC code the code can be actually quite bad, so
26372 don't try to be too clever here. */
26373 if (count != 0
26374 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
26376 cfun->machine->ra_needs_full_frame = 1;
26378 return
26379 gen_rtx_MEM
26380 (Pmode,
26381 memory_address
26382 (Pmode,
26383 plus_constant (Pmode,
26384 copy_to_reg
26385 (gen_rtx_MEM (Pmode,
26386 memory_address (Pmode, frame))),
26387 RETURN_ADDRESS_OFFSET)));
26390 cfun->machine->ra_need_lr = 1;
26391 return get_hard_reg_initial_val (Pmode, LR_REGNO);
26394 /* Say whether a function is a candidate for sibcall handling or not. */
26396 static bool
26397 rs6000_function_ok_for_sibcall (tree decl, tree exp)
26399 tree fntype;
26401 if (decl)
26402 fntype = TREE_TYPE (decl);
26403 else
26404 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
26406 /* We can't do it if the called function has more vector parameters
26407 than the current function; there's nowhere to put the VRsave code. */
26408 if (TARGET_ALTIVEC_ABI
26409 && TARGET_ALTIVEC_VRSAVE
26410 && !(decl && decl == current_function_decl))
26412 function_args_iterator args_iter;
26413 tree type;
26414 int nvreg = 0;
26416 /* Functions with vector parameters are required to have a
26417 prototype, so the argument type info must be available
26418 here. */
26419 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
26420 if (TREE_CODE (type) == VECTOR_TYPE
26421 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26422 nvreg++;
26424 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
26425 if (TREE_CODE (type) == VECTOR_TYPE
26426 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26427 nvreg--;
26429 if (nvreg > 0)
26430 return false;
26433 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
26434 functions, because the callee may have a different TOC pointer to
26435 the caller and there's no way to ensure we restore the TOC when
26436 we return. With the secure-plt SYSV ABI we can't make non-local
26437 calls when -fpic/PIC because the plt call stubs use r30. */
26438 if (DEFAULT_ABI == ABI_DARWIN
26439 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26440 && decl
26441 && !DECL_EXTERNAL (decl)
26442 && !DECL_WEAK (decl)
26443 && (*targetm.binds_local_p) (decl))
26444 || (DEFAULT_ABI == ABI_V4
26445 && (!TARGET_SECURE_PLT
26446 || !flag_pic
26447 || (decl
26448 && (*targetm.binds_local_p) (decl)))))
26450 tree attr_list = TYPE_ATTRIBUTES (fntype);
26452 if (!lookup_attribute ("longcall", attr_list)
26453 || lookup_attribute ("shortcall", attr_list))
26454 return true;
26457 return false;
26460 static int
26461 rs6000_ra_ever_killed (void)
26463 rtx_insn *top;
26464 rtx reg;
26465 rtx_insn *insn;
26467 if (cfun->is_thunk)
26468 return 0;
26470 if (cfun->machine->lr_save_state)
26471 return cfun->machine->lr_save_state - 1;
26473 /* regs_ever_live has LR marked as used if any sibcalls are present,
26474 but this should not force saving and restoring in the
26475 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
26476 clobbers LR, so that is inappropriate. */
26478 /* Also, the prologue can generate a store into LR that
26479 doesn't really count, like this:
26481 move LR->R0
26482 bcl to set PIC register
26483 move LR->R31
26484 move R0->LR
26486 When we're called from the epilogue, we need to avoid counting
26487 this as a store. */
26489 push_topmost_sequence ();
26490 top = get_insns ();
26491 pop_topmost_sequence ();
26492 reg = gen_rtx_REG (Pmode, LR_REGNO);
26494 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
26496 if (INSN_P (insn))
26498 if (CALL_P (insn))
26500 if (!SIBLING_CALL_P (insn))
26501 return 1;
26503 else if (find_regno_note (insn, REG_INC, LR_REGNO))
26504 return 1;
26505 else if (set_of (reg, insn) != NULL_RTX
26506 && !prologue_epilogue_contains (insn))
26507 return 1;
26510 return 0;
26513 /* Emit instructions needed to load the TOC register.
26514 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
26515 a constant pool; or for SVR4 -fpic. */
26517 void
26518 rs6000_emit_load_toc_table (int fromprolog)
26520 rtx dest;
26521 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26523 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
26525 char buf[30];
26526 rtx lab, tmp1, tmp2, got;
26528 lab = gen_label_rtx ();
26529 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
26530 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26531 if (flag_pic == 2)
26533 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26534 need_toc_init = 1;
26536 else
26537 got = rs6000_got_sym ();
26538 tmp1 = tmp2 = dest;
26539 if (!fromprolog)
26541 tmp1 = gen_reg_rtx (Pmode);
26542 tmp2 = gen_reg_rtx (Pmode);
26544 emit_insn (gen_load_toc_v4_PIC_1 (lab));
26545 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
26546 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
26547 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
26549 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
26551 emit_insn (gen_load_toc_v4_pic_si ());
26552 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26554 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
26556 char buf[30];
26557 rtx temp0 = (fromprolog
26558 ? gen_rtx_REG (Pmode, 0)
26559 : gen_reg_rtx (Pmode));
26561 if (fromprolog)
26563 rtx symF, symL;
26565 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26566 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26568 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26569 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26571 emit_insn (gen_load_toc_v4_PIC_1 (symF));
26572 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26573 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
26575 else
26577 rtx tocsym, lab;
26579 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26580 need_toc_init = 1;
26581 lab = gen_label_rtx ();
26582 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
26583 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26584 if (TARGET_LINK_STACK)
26585 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
26586 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
26588 emit_insn (gen_addsi3 (dest, temp0, dest));
26590 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
26592 /* This is for AIX code running in non-PIC ELF32. */
26593 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26595 need_toc_init = 1;
26596 emit_insn (gen_elf_high (dest, realsym));
26597 emit_insn (gen_elf_low (dest, dest, realsym));
26599 else
26601 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26603 if (TARGET_32BIT)
26604 emit_insn (gen_load_toc_aix_si (dest));
26605 else
26606 emit_insn (gen_load_toc_aix_di (dest));
26610 /* Emit instructions to restore the link register after determining where
26611 its value has been stored. */
26613 void
26614 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
26616 rs6000_stack_t *info = rs6000_stack_info ();
26617 rtx operands[2];
26619 operands[0] = source;
26620 operands[1] = scratch;
26622 if (info->lr_save_p)
26624 rtx frame_rtx = stack_pointer_rtx;
26625 HOST_WIDE_INT sp_offset = 0;
26626 rtx tmp;
26628 if (frame_pointer_needed
26629 || cfun->calls_alloca
26630 || info->total_size > 32767)
26632 tmp = gen_frame_mem (Pmode, frame_rtx);
26633 emit_move_insn (operands[1], tmp);
26634 frame_rtx = operands[1];
26636 else if (info->push_p)
26637 sp_offset = info->total_size;
26639 tmp = plus_constant (Pmode, frame_rtx,
26640 info->lr_save_offset + sp_offset);
26641 tmp = gen_frame_mem (Pmode, tmp);
26642 emit_move_insn (tmp, operands[0]);
26644 else
26645 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
26647 /* Freeze lr_save_p. We've just emitted rtl that depends on the
26648 state of lr_save_p so any change from here on would be a bug. In
26649 particular, stop rs6000_ra_ever_killed from considering the SET
26650 of lr we may have added just above. */
26651 cfun->machine->lr_save_state = info->lr_save_p + 1;
26654 static GTY(()) alias_set_type set = -1;
26656 alias_set_type
26657 get_TOC_alias_set (void)
26659 if (set == -1)
26660 set = new_alias_set ();
26661 return set;
26664 /* This returns nonzero if the current function uses the TOC. This is
26665 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
26666 is generated by the ABI_V4 load_toc_* patterns. */
26667 #if TARGET_ELF
26668 static int
26669 uses_TOC (void)
26671 rtx_insn *insn;
26673 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26674 if (INSN_P (insn))
26676 rtx pat = PATTERN (insn);
26677 int i;
26679 if (GET_CODE (pat) == PARALLEL)
26680 for (i = 0; i < XVECLEN (pat, 0); i++)
26682 rtx sub = XVECEXP (pat, 0, i);
26683 if (GET_CODE (sub) == USE)
26685 sub = XEXP (sub, 0);
26686 if (GET_CODE (sub) == UNSPEC
26687 && XINT (sub, 1) == UNSPEC_TOC)
26688 return 1;
26692 return 0;
26694 #endif
26697 create_TOC_reference (rtx symbol, rtx largetoc_reg)
26699 rtx tocrel, tocreg, hi;
26701 if (TARGET_DEBUG_ADDR)
26703 if (GET_CODE (symbol) == SYMBOL_REF)
26704 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
26705 XSTR (symbol, 0));
26706 else
26708 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
26709 GET_RTX_NAME (GET_CODE (symbol)));
26710 debug_rtx (symbol);
26714 if (!can_create_pseudo_p ())
26715 df_set_regs_ever_live (TOC_REGISTER, true);
26717 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
26718 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
26719 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
26720 return tocrel;
26722 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
26723 if (largetoc_reg != NULL)
26725 emit_move_insn (largetoc_reg, hi);
26726 hi = largetoc_reg;
26728 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
26731 /* Issue assembly directives that create a reference to the given DWARF
26732 FRAME_TABLE_LABEL from the current function section. */
26733 void
26734 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
26736 fprintf (asm_out_file, "\t.ref %s\n",
26737 (* targetm.strip_name_encoding) (frame_table_label));
26740 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
26741 and the change to the stack pointer. */
26743 static void
26744 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
26746 rtvec p;
26747 int i;
26748 rtx regs[3];
26750 i = 0;
26751 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26752 if (hard_frame_needed)
26753 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
26754 if (!(REGNO (fp) == STACK_POINTER_REGNUM
26755 || (hard_frame_needed
26756 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
26757 regs[i++] = fp;
26759 p = rtvec_alloc (i);
26760 while (--i >= 0)
26762 rtx mem = gen_frame_mem (BLKmode, regs[i]);
26763 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
26766 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
26769 /* Emit the correct code for allocating stack space, as insns.
26770 If COPY_REG, make sure a copy of the old frame is left there.
26771 The generated code may use hard register 0 as a temporary. */
26773 static rtx_insn *
26774 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
26776 rtx_insn *insn;
26777 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26778 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
26779 rtx todec = gen_int_mode (-size, Pmode);
26780 rtx par, set, mem;
26782 if (INTVAL (todec) != -size)
26784 warning (0, "stack frame too large");
26785 emit_insn (gen_trap ());
26786 return 0;
26789 if (crtl->limit_stack)
26791 if (REG_P (stack_limit_rtx)
26792 && REGNO (stack_limit_rtx) > 1
26793 && REGNO (stack_limit_rtx) <= 31)
26795 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
26796 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26797 const0_rtx));
26799 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
26800 && TARGET_32BIT
26801 && DEFAULT_ABI == ABI_V4)
26803 rtx toload = gen_rtx_CONST (VOIDmode,
26804 gen_rtx_PLUS (Pmode,
26805 stack_limit_rtx,
26806 GEN_INT (size)));
26808 emit_insn (gen_elf_high (tmp_reg, toload));
26809 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
26810 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26811 const0_rtx));
26813 else
26814 warning (0, "stack limit expression is not supported");
26817 if (copy_reg)
26819 if (copy_off != 0)
26820 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
26821 else
26822 emit_move_insn (copy_reg, stack_reg);
26825 if (size > 32767)
26827 /* Need a note here so that try_split doesn't get confused. */
26828 if (get_last_insn () == NULL_RTX)
26829 emit_note (NOTE_INSN_DELETED);
26830 insn = emit_move_insn (tmp_reg, todec);
26831 try_split (PATTERN (insn), insn, 0);
26832 todec = tmp_reg;
26835 insn = emit_insn (TARGET_32BIT
26836 ? gen_movsi_update_stack (stack_reg, stack_reg,
26837 todec, stack_reg)
26838 : gen_movdi_di_update_stack (stack_reg, stack_reg,
26839 todec, stack_reg));
26840 /* Since we didn't use gen_frame_mem to generate the MEM, grab
26841 it now and set the alias set/attributes. The above gen_*_update
26842 calls will generate a PARALLEL with the MEM set being the first
26843 operation. */
26844 par = PATTERN (insn);
26845 gcc_assert (GET_CODE (par) == PARALLEL);
26846 set = XVECEXP (par, 0, 0);
26847 gcc_assert (GET_CODE (set) == SET);
26848 mem = SET_DEST (set);
26849 gcc_assert (MEM_P (mem));
26850 MEM_NOTRAP_P (mem) = 1;
26851 set_mem_alias_set (mem, get_frame_alias_set ());
26853 RTX_FRAME_RELATED_P (insn) = 1;
26854 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26855 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
26856 GEN_INT (-size))));
26857 return insn;
26860 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
26862 #if PROBE_INTERVAL > 32768
26863 #error Cannot use indexed addressing mode for stack probing
26864 #endif
26866 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
26867 inclusive. These are offsets from the current stack pointer. */
26869 static void
26870 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
26872 /* See if we have a constant small number of probes to generate. If so,
26873 that's the easy case. */
26874 if (first + size <= 32768)
26876 HOST_WIDE_INT i;
26878 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
26879 it exceeds SIZE. If only one probe is needed, this will not
26880 generate any code. Then probe at FIRST + SIZE. */
26881 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
26882 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26883 -(first + i)));
26885 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26886 -(first + size)));
26889 /* Otherwise, do the same as above, but in a loop. Note that we must be
26890 extra careful with variables wrapping around because we might be at
26891 the very top (or the very bottom) of the address space and we have
26892 to be able to handle this case properly; in particular, we use an
26893 equality test for the loop condition. */
26894 else
26896 HOST_WIDE_INT rounded_size;
26897 rtx r12 = gen_rtx_REG (Pmode, 12);
26898 rtx r0 = gen_rtx_REG (Pmode, 0);
26900 /* Sanity check for the addressing mode we're going to use. */
26901 gcc_assert (first <= 32768);
26903 /* Step 1: round SIZE to the previous multiple of the interval. */
26905 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
26908 /* Step 2: compute initial and final value of the loop counter. */
26910 /* TEST_ADDR = SP + FIRST. */
26911 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
26912 -first)));
26914 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
26915 if (rounded_size > 32768)
26917 emit_move_insn (r0, GEN_INT (-rounded_size));
26918 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
26920 else
26921 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
26922 -rounded_size)));
26925 /* Step 3: the loop
26929 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
26930 probe at TEST_ADDR
26932 while (TEST_ADDR != LAST_ADDR)
26934 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
26935 until it is equal to ROUNDED_SIZE. */
26937 if (TARGET_64BIT)
26938 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
26939 else
26940 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
26943 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
26944 that SIZE is equal to ROUNDED_SIZE. */
26946 if (size != rounded_size)
26947 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
26951 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
26952 absolute addresses. */
26954 const char *
26955 output_probe_stack_range (rtx reg1, rtx reg2)
26957 static int labelno = 0;
26958 char loop_lab[32];
26959 rtx xops[2];
26961 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
26963 /* Loop. */
26964 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
26966 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
26967 xops[0] = reg1;
26968 xops[1] = GEN_INT (-PROBE_INTERVAL);
26969 output_asm_insn ("addi %0,%0,%1", xops);
26971 /* Probe at TEST_ADDR. */
26972 xops[1] = gen_rtx_REG (Pmode, 0);
26973 output_asm_insn ("stw %1,0(%0)", xops);
26975 /* Test if TEST_ADDR == LAST_ADDR. */
26976 xops[1] = reg2;
26977 if (TARGET_64BIT)
26978 output_asm_insn ("cmpd 0,%0,%1", xops);
26979 else
26980 output_asm_insn ("cmpw 0,%0,%1", xops);
26982 /* Branch. */
26983 fputs ("\tbne 0,", asm_out_file);
26984 assemble_name_raw (asm_out_file, loop_lab);
26985 fputc ('\n', asm_out_file);
26987 return "";
26990 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26991 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26992 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26993 deduce these equivalences by itself so it wasn't necessary to hold
26994 its hand so much. Don't be tempted to always supply d2_f_d_e with
26995 the actual cfa register, ie. r31 when we are using a hard frame
26996 pointer. That fails when saving regs off r1, and sched moves the
26997 r31 setup past the reg saves. */
26999 static rtx
27000 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
27001 rtx reg2, rtx repl2)
27003 rtx repl;
27005 if (REGNO (reg) == STACK_POINTER_REGNUM)
27007 gcc_checking_assert (val == 0);
27008 repl = NULL_RTX;
27010 else
27011 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27012 GEN_INT (val));
27014 rtx pat = PATTERN (insn);
27015 if (!repl && !reg2)
27017 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
27018 if (GET_CODE (pat) == PARALLEL)
27019 for (int i = 0; i < XVECLEN (pat, 0); i++)
27020 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27022 rtx set = XVECEXP (pat, 0, i);
27024 /* If this PARALLEL has been emitted for out-of-line
27025 register save functions, or store multiple, then omit
27026 eh_frame info for any user-defined global regs. If
27027 eh_frame info is supplied, frame unwinding will
27028 restore a user reg. */
27029 if (!REG_P (SET_SRC (set))
27030 || !fixed_reg_p (REGNO (SET_SRC (set))))
27031 RTX_FRAME_RELATED_P (set) = 1;
27033 RTX_FRAME_RELATED_P (insn) = 1;
27034 return insn;
27037 /* We expect that 'pat' is either a SET or a PARALLEL containing
27038 SETs (and possibly other stuff). In a PARALLEL, all the SETs
27039 are important so they all have to be marked RTX_FRAME_RELATED_P.
27040 Call simplify_replace_rtx on the SETs rather than the whole insn
27041 so as to leave the other stuff alone (for example USE of r12). */
27043 if (GET_CODE (pat) == SET)
27045 if (repl)
27046 pat = simplify_replace_rtx (pat, reg, repl);
27047 if (reg2)
27048 pat = simplify_replace_rtx (pat, reg2, repl2);
27050 else if (GET_CODE (pat) == PARALLEL)
27052 pat = shallow_copy_rtx (pat);
27053 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
27055 for (int i = 0; i < XVECLEN (pat, 0); i++)
27056 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27058 rtx set = XVECEXP (pat, 0, i);
27060 if (repl)
27061 set = simplify_replace_rtx (set, reg, repl);
27062 if (reg2)
27063 set = simplify_replace_rtx (set, reg2, repl2);
27064 XVECEXP (pat, 0, i) = set;
27066 /* Omit eh_frame info for any user-defined global regs. */
27067 if (!REG_P (SET_SRC (set))
27068 || !fixed_reg_p (REGNO (SET_SRC (set))))
27069 RTX_FRAME_RELATED_P (set) = 1;
27072 else
27073 gcc_unreachable ();
27075 RTX_FRAME_RELATED_P (insn) = 1;
27076 if (repl || reg2)
27077 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
27079 return insn;
27082 /* Returns an insn that has a vrsave set operation with the
27083 appropriate CLOBBERs. */
27085 static rtx
27086 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
27088 int nclobs, i;
27089 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
27090 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27092 clobs[0]
27093 = gen_rtx_SET (vrsave,
27094 gen_rtx_UNSPEC_VOLATILE (SImode,
27095 gen_rtvec (2, reg, vrsave),
27096 UNSPECV_SET_VRSAVE));
27098 nclobs = 1;
27100 /* We need to clobber the registers in the mask so the scheduler
27101 does not move sets to VRSAVE before sets of AltiVec registers.
27103 However, if the function receives nonlocal gotos, reload will set
27104 all call saved registers live. We will end up with:
27106 (set (reg 999) (mem))
27107 (parallel [ (set (reg vrsave) (unspec blah))
27108 (clobber (reg 999))])
27110 The clobber will cause the store into reg 999 to be dead, and
27111 flow will attempt to delete an epilogue insn. In this case, we
27112 need an unspec use/set of the register. */
27114 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27115 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27117 if (!epiloguep || call_used_regs [i])
27118 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
27119 gen_rtx_REG (V4SImode, i));
27120 else
27122 rtx reg = gen_rtx_REG (V4SImode, i);
27124 clobs[nclobs++]
27125 = gen_rtx_SET (reg,
27126 gen_rtx_UNSPEC (V4SImode,
27127 gen_rtvec (1, reg), 27));
27131 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
27133 for (i = 0; i < nclobs; ++i)
27134 XVECEXP (insn, 0, i) = clobs[i];
27136 return insn;
27139 static rtx
27140 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
27142 rtx addr, mem;
27144 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
27145 mem = gen_frame_mem (GET_MODE (reg), addr);
27146 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
27149 static rtx
27150 gen_frame_load (rtx reg, rtx frame_reg, int offset)
27152 return gen_frame_set (reg, frame_reg, offset, false);
27155 static rtx
27156 gen_frame_store (rtx reg, rtx frame_reg, int offset)
27158 return gen_frame_set (reg, frame_reg, offset, true);
27161 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
27162 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
27164 static rtx
27165 emit_frame_save (rtx frame_reg, machine_mode mode,
27166 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
27168 rtx reg, insn;
27170 /* Some cases that need register indexed addressing. */
27171 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
27172 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
27173 || (TARGET_E500_DOUBLE && mode == DFmode)
27174 || (TARGET_SPE_ABI
27175 && SPE_VECTOR_MODE (mode)
27176 && !SPE_CONST_OFFSET_OK (offset))));
27178 reg = gen_rtx_REG (mode, regno);
27179 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
27180 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
27181 NULL_RTX, NULL_RTX);
27184 /* Emit an offset memory reference suitable for a frame store, while
27185 converting to a valid addressing mode. */
27187 static rtx
27188 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
27190 rtx int_rtx, offset_rtx;
27192 int_rtx = GEN_INT (offset);
27194 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
27195 || (TARGET_E500_DOUBLE && mode == DFmode))
27197 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
27198 emit_move_insn (offset_rtx, int_rtx);
27200 else
27201 offset_rtx = int_rtx;
27203 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
27206 #ifndef TARGET_FIX_AND_CONTINUE
27207 #define TARGET_FIX_AND_CONTINUE 0
27208 #endif
27210 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
27211 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
27212 #define LAST_SAVRES_REGISTER 31
27213 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
27215 enum {
27216 SAVRES_LR = 0x1,
27217 SAVRES_SAVE = 0x2,
27218 SAVRES_REG = 0x0c,
27219 SAVRES_GPR = 0,
27220 SAVRES_FPR = 4,
27221 SAVRES_VR = 8
27224 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
27226 /* Temporary holding space for an out-of-line register save/restore
27227 routine name. */
27228 static char savres_routine_name[30];
27230 /* Return the name for an out-of-line register save/restore routine.
27231 We are saving/restoring GPRs if GPR is true. */
27233 static char *
27234 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
27236 const char *prefix = "";
27237 const char *suffix = "";
27239 /* Different targets are supposed to define
27240 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
27241 routine name could be defined with:
27243 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
27245 This is a nice idea in practice, but in reality, things are
27246 complicated in several ways:
27248 - ELF targets have save/restore routines for GPRs.
27250 - SPE targets use different prefixes for 32/64-bit registers, and
27251 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
27253 - PPC64 ELF targets have routines for save/restore of GPRs that
27254 differ in what they do with the link register, so having a set
27255 prefix doesn't work. (We only use one of the save routines at
27256 the moment, though.)
27258 - PPC32 elf targets have "exit" versions of the restore routines
27259 that restore the link register and can save some extra space.
27260 These require an extra suffix. (There are also "tail" versions
27261 of the restore routines and "GOT" versions of the save routines,
27262 but we don't generate those at present. Same problems apply,
27263 though.)
27265 We deal with all this by synthesizing our own prefix/suffix and
27266 using that for the simple sprintf call shown above. */
27267 if (TARGET_SPE)
27269 /* No floating point saves on the SPE. */
27270 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
27272 if ((sel & SAVRES_SAVE))
27273 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
27274 else
27275 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
27277 if ((sel & SAVRES_LR))
27278 suffix = "_x";
27280 else if (DEFAULT_ABI == ABI_V4)
27282 if (TARGET_64BIT)
27283 goto aix_names;
27285 if ((sel & SAVRES_REG) == SAVRES_GPR)
27286 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
27287 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27288 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
27289 else if ((sel & SAVRES_REG) == SAVRES_VR)
27290 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27291 else
27292 abort ();
27294 if ((sel & SAVRES_LR))
27295 suffix = "_x";
27297 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27299 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
27300 /* No out-of-line save/restore routines for GPRs on AIX. */
27301 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
27302 #endif
27304 aix_names:
27305 if ((sel & SAVRES_REG) == SAVRES_GPR)
27306 prefix = ((sel & SAVRES_SAVE)
27307 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
27308 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
27309 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27311 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27312 if ((sel & SAVRES_LR))
27313 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
27314 else
27315 #endif
27317 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
27318 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
27321 else if ((sel & SAVRES_REG) == SAVRES_VR)
27322 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27323 else
27324 abort ();
27327 if (DEFAULT_ABI == ABI_DARWIN)
27329 /* The Darwin approach is (slightly) different, in order to be
27330 compatible with code generated by the system toolchain. There is a
27331 single symbol for the start of save sequence, and the code here
27332 embeds an offset into that code on the basis of the first register
27333 to be saved. */
27334 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
27335 if ((sel & SAVRES_REG) == SAVRES_GPR)
27336 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
27337 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
27338 (regno - 13) * 4, prefix, regno);
27339 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27340 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
27341 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
27342 else if ((sel & SAVRES_REG) == SAVRES_VR)
27343 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
27344 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
27345 else
27346 abort ();
27348 else
27349 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
27351 return savres_routine_name;
27354 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
27355 We are saving/restoring GPRs if GPR is true. */
27357 static rtx
27358 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
27360 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
27361 ? info->first_gp_reg_save
27362 : (sel & SAVRES_REG) == SAVRES_FPR
27363 ? info->first_fp_reg_save - 32
27364 : (sel & SAVRES_REG) == SAVRES_VR
27365 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
27366 : -1);
27367 rtx sym;
27368 int select = sel;
27370 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
27371 versions of the gpr routines. */
27372 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
27373 && info->spe_64bit_regs_used)
27374 select ^= SAVRES_FPR ^ SAVRES_GPR;
27376 /* Don't generate bogus routine names. */
27377 gcc_assert (FIRST_SAVRES_REGISTER <= regno
27378 && regno <= LAST_SAVRES_REGISTER
27379 && select >= 0 && select <= 12);
27381 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
27383 if (sym == NULL)
27385 char *name;
27387 name = rs6000_savres_routine_name (info, regno, sel);
27389 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
27390 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
27391 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
27394 return sym;
27397 /* Emit a sequence of insns, including a stack tie if needed, for
27398 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
27399 reset the stack pointer, but move the base of the frame into
27400 reg UPDT_REGNO for use by out-of-line register restore routines. */
27402 static rtx
27403 rs6000_emit_stack_reset (rs6000_stack_t *info,
27404 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
27405 unsigned updt_regno)
27407 rtx updt_reg_rtx;
27409 /* This blockage is needed so that sched doesn't decide to move
27410 the sp change before the register restores. */
27411 if (DEFAULT_ABI == ABI_V4
27412 || (TARGET_SPE_ABI
27413 && info->spe_64bit_regs_used != 0
27414 && info->first_gp_reg_save != 32))
27415 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
27417 /* If we are restoring registers out-of-line, we will be using the
27418 "exit" variants of the restore routines, which will reset the
27419 stack for us. But we do need to point updt_reg into the
27420 right place for those routines. */
27421 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
27423 if (frame_off != 0)
27424 return emit_insn (gen_add3_insn (updt_reg_rtx,
27425 frame_reg_rtx, GEN_INT (frame_off)));
27426 else if (REGNO (frame_reg_rtx) != updt_regno)
27427 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
27429 return NULL_RTX;
27432 /* Return the register number used as a pointer by out-of-line
27433 save/restore functions. */
27435 static inline unsigned
27436 ptr_regno_for_savres (int sel)
27438 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27439 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
27440 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
27443 /* Construct a parallel rtx describing the effect of a call to an
27444 out-of-line register save/restore routine, and emit the insn
27445 or jump_insn as appropriate. */
27447 static rtx
27448 rs6000_emit_savres_rtx (rs6000_stack_t *info,
27449 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
27450 machine_mode reg_mode, int sel)
27452 int i;
27453 int offset, start_reg, end_reg, n_regs, use_reg;
27454 int reg_size = GET_MODE_SIZE (reg_mode);
27455 rtx sym;
27456 rtvec p;
27457 rtx par, insn;
27459 offset = 0;
27460 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27461 ? info->first_gp_reg_save
27462 : (sel & SAVRES_REG) == SAVRES_FPR
27463 ? info->first_fp_reg_save
27464 : (sel & SAVRES_REG) == SAVRES_VR
27465 ? info->first_altivec_reg_save
27466 : -1);
27467 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27468 ? 32
27469 : (sel & SAVRES_REG) == SAVRES_FPR
27470 ? 64
27471 : (sel & SAVRES_REG) == SAVRES_VR
27472 ? LAST_ALTIVEC_REGNO + 1
27473 : -1);
27474 n_regs = end_reg - start_reg;
27475 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
27476 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
27477 + n_regs);
27479 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27480 RTVEC_ELT (p, offset++) = ret_rtx;
27482 RTVEC_ELT (p, offset++)
27483 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27485 sym = rs6000_savres_routine_sym (info, sel);
27486 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
27488 use_reg = ptr_regno_for_savres (sel);
27489 if ((sel & SAVRES_REG) == SAVRES_VR)
27491 /* Vector regs are saved/restored using [reg+reg] addressing. */
27492 RTVEC_ELT (p, offset++)
27493 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27494 RTVEC_ELT (p, offset++)
27495 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
27497 else
27498 RTVEC_ELT (p, offset++)
27499 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27501 for (i = 0; i < end_reg - start_reg; i++)
27502 RTVEC_ELT (p, i + offset)
27503 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
27504 frame_reg_rtx, save_area_offset + reg_size * i,
27505 (sel & SAVRES_SAVE) != 0);
27507 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27508 RTVEC_ELT (p, i + offset)
27509 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
27511 par = gen_rtx_PARALLEL (VOIDmode, p);
27513 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27515 insn = emit_jump_insn (par);
27516 JUMP_LABEL (insn) = ret_rtx;
27518 else
27519 insn = emit_insn (par);
27520 return insn;
27523 /* Emit code to store CR fields that need to be saved into REG. */
27525 static void
27526 rs6000_emit_move_from_cr (rtx reg)
27528 /* Only the ELFv2 ABI allows storing only selected fields. */
27529 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
27531 int i, cr_reg[8], count = 0;
27533 /* Collect CR fields that must be saved. */
27534 for (i = 0; i < 8; i++)
27535 if (save_reg_p (CR0_REGNO + i))
27536 cr_reg[count++] = i;
27538 /* If it's just a single one, use mfcrf. */
27539 if (count == 1)
27541 rtvec p = rtvec_alloc (1);
27542 rtvec r = rtvec_alloc (2);
27543 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
27544 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
27545 RTVEC_ELT (p, 0)
27546 = gen_rtx_SET (reg,
27547 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
27549 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27550 return;
27553 /* ??? It might be better to handle count == 2 / 3 cases here
27554 as well, using logical operations to combine the values. */
27557 emit_insn (gen_movesi_from_cr (reg));
27560 /* Return whether the split-stack arg pointer (r12) is used. */
27562 static bool
27563 split_stack_arg_pointer_used_p (void)
27565 /* If the pseudo holding the arg pointer is no longer a pseudo,
27566 then the arg pointer is used. */
27567 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
27568 && (!REG_P (cfun->machine->split_stack_arg_pointer)
27569 || (REGNO (cfun->machine->split_stack_arg_pointer)
27570 < FIRST_PSEUDO_REGISTER)))
27571 return true;
27573 /* Unfortunately we also need to do some code scanning, since
27574 r12 may have been substituted for the pseudo. */
27575 rtx_insn *insn;
27576 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
27577 FOR_BB_INSNS (bb, insn)
27578 if (NONDEBUG_INSN_P (insn))
27580 /* A call destroys r12. */
27581 if (CALL_P (insn))
27582 return false;
27584 df_ref use;
27585 FOR_EACH_INSN_USE (use, insn)
27587 rtx x = DF_REF_REG (use);
27588 if (REG_P (x) && REGNO (x) == 12)
27589 return true;
27591 df_ref def;
27592 FOR_EACH_INSN_DEF (def, insn)
27594 rtx x = DF_REF_REG (def);
27595 if (REG_P (x) && REGNO (x) == 12)
27596 return false;
27599 return bitmap_bit_p (DF_LR_OUT (bb), 12);
27602 /* Return whether we need to emit an ELFv2 global entry point prologue. */
27604 static bool
27605 rs6000_global_entry_point_needed_p (void)
27607 /* Only needed for the ELFv2 ABI. */
27608 if (DEFAULT_ABI != ABI_ELFv2)
27609 return false;
27611 /* With -msingle-pic-base, we assume the whole program shares the same
27612 TOC, so no global entry point prologues are needed anywhere. */
27613 if (TARGET_SINGLE_PIC_BASE)
27614 return false;
27616 /* Ensure we have a global entry point for thunks. ??? We could
27617 avoid that if the target routine doesn't need a global entry point,
27618 but we do not know whether this is the case at this point. */
27619 if (cfun->is_thunk)
27620 return true;
27622 /* For regular functions, rs6000_emit_prologue sets this flag if the
27623 routine ever uses the TOC pointer. */
27624 return cfun->machine->r2_setup_needed;
27627 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
27628 static sbitmap
27629 rs6000_get_separate_components (void)
27631 rs6000_stack_t *info = rs6000_stack_info ();
27633 if (WORLD_SAVE_P (info))
27634 return NULL;
27636 sbitmap components = sbitmap_alloc (32);
27637 bitmap_clear (components);
27639 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
27640 && !(info->savres_strategy & REST_MULTIPLE));
27642 /* The GPRs we need saved to the frame. */
27643 if ((info->savres_strategy & SAVE_INLINE_GPRS)
27644 && (info->savres_strategy & REST_INLINE_GPRS))
27646 int reg_size = TARGET_32BIT ? 4 : 8;
27647 int offset = info->gp_save_offset;
27648 if (info->push_p)
27649 offset += info->total_size;
27651 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27653 if (IN_RANGE (offset, -0x8000, 0x7fff)
27654 && rs6000_reg_live_or_pic_offset_p (regno))
27655 bitmap_set_bit (components, regno);
27657 offset += reg_size;
27661 /* Don't mess with the hard frame pointer. */
27662 if (frame_pointer_needed)
27663 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
27665 /* Don't mess with the fixed TOC register. */
27666 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
27667 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27668 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27669 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
27671 /* Optimize LR save and restore if we can. This is component 0. Any
27672 out-of-line register save/restore routines need LR. */
27673 if (info->lr_save_p
27674 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27675 && (info->savres_strategy & SAVE_INLINE_GPRS)
27676 && (info->savres_strategy & REST_INLINE_GPRS)
27677 && (info->savres_strategy & SAVE_INLINE_FPRS)
27678 && (info->savres_strategy & REST_INLINE_FPRS)
27679 && (info->savres_strategy & SAVE_INLINE_VRS)
27680 && (info->savres_strategy & REST_INLINE_VRS))
27682 int offset = info->lr_save_offset;
27683 if (info->push_p)
27684 offset += info->total_size;
27685 if (IN_RANGE (offset, -0x8000, 0x7fff))
27686 bitmap_set_bit (components, 0);
27689 return components;
27692 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
27693 static sbitmap
27694 rs6000_components_for_bb (basic_block bb)
27696 rs6000_stack_t *info = rs6000_stack_info ();
27698 bitmap in = DF_LIVE_IN (bb);
27699 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
27700 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
27702 sbitmap components = sbitmap_alloc (32);
27703 bitmap_clear (components);
27705 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
27706 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27707 if (bitmap_bit_p (in, regno)
27708 || bitmap_bit_p (gen, regno)
27709 || bitmap_bit_p (kill, regno))
27710 bitmap_set_bit (components, regno);
27712 /* LR needs to be saved around a bb if it is killed in that bb. */
27713 if (bitmap_bit_p (gen, LR_REGNO)
27714 || bitmap_bit_p (kill, LR_REGNO))
27715 bitmap_set_bit (components, 0);
27717 return components;
27720 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
27721 static void
27722 rs6000_disqualify_components (sbitmap components, edge e,
27723 sbitmap edge_components, bool /*is_prologue*/)
27725 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
27726 live where we want to place that code. */
27727 if (bitmap_bit_p (edge_components, 0)
27728 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
27730 if (dump_file)
27731 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
27732 "on entry to bb %d\n", e->dest->index);
27733 bitmap_clear_bit (components, 0);
27737 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
27738 static void
27739 rs6000_emit_prologue_components (sbitmap components)
27741 rs6000_stack_t *info = rs6000_stack_info ();
27742 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27743 ? HARD_FRAME_POINTER_REGNUM
27744 : STACK_POINTER_REGNUM);
27745 int reg_size = TARGET_32BIT ? 4 : 8;
27747 /* Prologue for LR. */
27748 if (bitmap_bit_p (components, 0))
27750 rtx reg = gen_rtx_REG (Pmode, 0);
27751 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27752 RTX_FRAME_RELATED_P (insn) = 1;
27753 add_reg_note (insn, REG_CFA_REGISTER, NULL);
27755 int offset = info->lr_save_offset;
27756 if (info->push_p)
27757 offset += info->total_size;
27759 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27760 RTX_FRAME_RELATED_P (insn) = 1;
27761 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27762 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
27763 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
27766 /* Prologue for the GPRs. */
27767 int offset = info->gp_save_offset;
27768 if (info->push_p)
27769 offset += info->total_size;
27771 for (int i = info->first_gp_reg_save; i < 32; i++)
27773 if (bitmap_bit_p (components, i))
27775 rtx reg = gen_rtx_REG (Pmode, i);
27776 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27777 RTX_FRAME_RELATED_P (insn) = 1;
27778 rtx set = copy_rtx (single_set (insn));
27779 add_reg_note (insn, REG_CFA_OFFSET, set);
27782 offset += reg_size;
27786 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
27787 static void
27788 rs6000_emit_epilogue_components (sbitmap components)
27790 rs6000_stack_t *info = rs6000_stack_info ();
27791 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27792 ? HARD_FRAME_POINTER_REGNUM
27793 : STACK_POINTER_REGNUM);
27794 int reg_size = TARGET_32BIT ? 4 : 8;
27796 /* Epilogue for the GPRs. */
27797 int offset = info->gp_save_offset;
27798 if (info->push_p)
27799 offset += info->total_size;
27801 for (int i = info->first_gp_reg_save; i < 32; i++)
27803 if (bitmap_bit_p (components, i))
27805 rtx reg = gen_rtx_REG (Pmode, i);
27806 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27807 RTX_FRAME_RELATED_P (insn) = 1;
27808 add_reg_note (insn, REG_CFA_RESTORE, reg);
27811 offset += reg_size;
27814 /* Epilogue for LR. */
27815 if (bitmap_bit_p (components, 0))
27817 int offset = info->lr_save_offset;
27818 if (info->push_p)
27819 offset += info->total_size;
27821 rtx reg = gen_rtx_REG (Pmode, 0);
27822 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27824 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27825 insn = emit_move_insn (lr, reg);
27826 RTX_FRAME_RELATED_P (insn) = 1;
27827 add_reg_note (insn, REG_CFA_RESTORE, lr);
27831 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
27832 static void
27833 rs6000_set_handled_components (sbitmap components)
27835 rs6000_stack_t *info = rs6000_stack_info ();
27837 for (int i = info->first_gp_reg_save; i < 32; i++)
27838 if (bitmap_bit_p (components, i))
27839 cfun->machine->gpr_is_wrapped_separately[i] = true;
27841 if (bitmap_bit_p (components, 0))
27842 cfun->machine->lr_is_wrapped_separately = true;
27845 /* Emit function prologue as insns. */
27847 void
27848 rs6000_emit_prologue (void)
27850 rs6000_stack_t *info = rs6000_stack_info ();
27851 machine_mode reg_mode = Pmode;
27852 int reg_size = TARGET_32BIT ? 4 : 8;
27853 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27854 rtx frame_reg_rtx = sp_reg_rtx;
27855 unsigned int cr_save_regno;
27856 rtx cr_save_rtx = NULL_RTX;
27857 rtx insn;
27858 int strategy;
27859 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27860 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27861 && call_used_regs[STATIC_CHAIN_REGNUM]);
27862 int using_split_stack = (flag_split_stack
27863 && (lookup_attribute ("no_split_stack",
27864 DECL_ATTRIBUTES (cfun->decl))
27865 == NULL));
27867 /* Offset to top of frame for frame_reg and sp respectively. */
27868 HOST_WIDE_INT frame_off = 0;
27869 HOST_WIDE_INT sp_off = 0;
27870 /* sp_adjust is the stack adjusting instruction, tracked so that the
27871 insn setting up the split-stack arg pointer can be emitted just
27872 prior to it, when r12 is not used here for other purposes. */
27873 rtx_insn *sp_adjust = 0;
27875 #if CHECKING_P
27876 /* Track and check usage of r0, r11, r12. */
27877 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27878 #define START_USE(R) do \
27880 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27881 reg_inuse |= 1 << (R); \
27882 } while (0)
27883 #define END_USE(R) do \
27885 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27886 reg_inuse &= ~(1 << (R)); \
27887 } while (0)
27888 #define NOT_INUSE(R) do \
27890 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27891 } while (0)
27892 #else
27893 #define START_USE(R) do {} while (0)
27894 #define END_USE(R) do {} while (0)
27895 #define NOT_INUSE(R) do {} while (0)
27896 #endif
27898 if (DEFAULT_ABI == ABI_ELFv2
27899 && !TARGET_SINGLE_PIC_BASE)
27901 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27903 /* With -mminimal-toc we may generate an extra use of r2 below. */
27904 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27905 cfun->machine->r2_setup_needed = true;
27909 if (flag_stack_usage_info)
27910 current_function_static_stack_size = info->total_size;
27912 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27914 HOST_WIDE_INT size = info->total_size;
27916 if (crtl->is_leaf && !cfun->calls_alloca)
27918 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27919 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
27920 size - STACK_CHECK_PROTECT);
27922 else if (size > 0)
27923 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
27926 if (TARGET_FIX_AND_CONTINUE)
27928 /* gdb on darwin arranges to forward a function from the old
27929 address by modifying the first 5 instructions of the function
27930 to branch to the overriding function. This is necessary to
27931 permit function pointers that point to the old function to
27932 actually forward to the new function. */
27933 emit_insn (gen_nop ());
27934 emit_insn (gen_nop ());
27935 emit_insn (gen_nop ());
27936 emit_insn (gen_nop ());
27937 emit_insn (gen_nop ());
27940 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27942 reg_mode = V2SImode;
27943 reg_size = 8;
27946 /* Handle world saves specially here. */
27947 if (WORLD_SAVE_P (info))
27949 int i, j, sz;
27950 rtx treg;
27951 rtvec p;
27952 rtx reg0;
27954 /* save_world expects lr in r0. */
27955 reg0 = gen_rtx_REG (Pmode, 0);
27956 if (info->lr_save_p)
27958 insn = emit_move_insn (reg0,
27959 gen_rtx_REG (Pmode, LR_REGNO));
27960 RTX_FRAME_RELATED_P (insn) = 1;
27963 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27964 assumptions about the offsets of various bits of the stack
27965 frame. */
27966 gcc_assert (info->gp_save_offset == -220
27967 && info->fp_save_offset == -144
27968 && info->lr_save_offset == 8
27969 && info->cr_save_offset == 4
27970 && info->push_p
27971 && info->lr_save_p
27972 && (!crtl->calls_eh_return
27973 || info->ehrd_offset == -432)
27974 && info->vrsave_save_offset == -224
27975 && info->altivec_save_offset == -416);
27977 treg = gen_rtx_REG (SImode, 11);
27978 emit_move_insn (treg, GEN_INT (-info->total_size));
27980 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27981 in R11. It also clobbers R12, so beware! */
27983 /* Preserve CR2 for save_world prologues */
27984 sz = 5;
27985 sz += 32 - info->first_gp_reg_save;
27986 sz += 64 - info->first_fp_reg_save;
27987 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27988 p = rtvec_alloc (sz);
27989 j = 0;
27990 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27991 gen_rtx_REG (SImode,
27992 LR_REGNO));
27993 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27994 gen_rtx_SYMBOL_REF (Pmode,
27995 "*save_world"));
27996 /* We do floats first so that the instruction pattern matches
27997 properly. */
27998 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27999 RTVEC_ELT (p, j++)
28000 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28001 ? DFmode : SFmode,
28002 info->first_fp_reg_save + i),
28003 frame_reg_rtx,
28004 info->fp_save_offset + frame_off + 8 * i);
28005 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28006 RTVEC_ELT (p, j++)
28007 = gen_frame_store (gen_rtx_REG (V4SImode,
28008 info->first_altivec_reg_save + i),
28009 frame_reg_rtx,
28010 info->altivec_save_offset + frame_off + 16 * i);
28011 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28012 RTVEC_ELT (p, j++)
28013 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28014 frame_reg_rtx,
28015 info->gp_save_offset + frame_off + reg_size * i);
28017 /* CR register traditionally saved as CR2. */
28018 RTVEC_ELT (p, j++)
28019 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
28020 frame_reg_rtx, info->cr_save_offset + frame_off);
28021 /* Explain about use of R0. */
28022 if (info->lr_save_p)
28023 RTVEC_ELT (p, j++)
28024 = gen_frame_store (reg0,
28025 frame_reg_rtx, info->lr_save_offset + frame_off);
28026 /* Explain what happens to the stack pointer. */
28028 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
28029 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
28032 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28033 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28034 treg, GEN_INT (-info->total_size));
28035 sp_off = frame_off = info->total_size;
28038 strategy = info->savres_strategy;
28040 /* For V.4, update stack before we do any saving and set back pointer. */
28041 if (! WORLD_SAVE_P (info)
28042 && info->push_p
28043 && (DEFAULT_ABI == ABI_V4
28044 || crtl->calls_eh_return))
28046 bool need_r11 = (TARGET_SPE
28047 ? (!(strategy & SAVE_INLINE_GPRS)
28048 && info->spe_64bit_regs_used == 0)
28049 : (!(strategy & SAVE_INLINE_FPRS)
28050 || !(strategy & SAVE_INLINE_GPRS)
28051 || !(strategy & SAVE_INLINE_VRS)));
28052 int ptr_regno = -1;
28053 rtx ptr_reg = NULL_RTX;
28054 int ptr_off = 0;
28056 if (info->total_size < 32767)
28057 frame_off = info->total_size;
28058 else if (need_r11)
28059 ptr_regno = 11;
28060 else if (info->cr_save_p
28061 || info->lr_save_p
28062 || info->first_fp_reg_save < 64
28063 || info->first_gp_reg_save < 32
28064 || info->altivec_size != 0
28065 || info->vrsave_size != 0
28066 || crtl->calls_eh_return)
28067 ptr_regno = 12;
28068 else
28070 /* The prologue won't be saving any regs so there is no need
28071 to set up a frame register to access any frame save area.
28072 We also won't be using frame_off anywhere below, but set
28073 the correct value anyway to protect against future
28074 changes to this function. */
28075 frame_off = info->total_size;
28077 if (ptr_regno != -1)
28079 /* Set up the frame offset to that needed by the first
28080 out-of-line save function. */
28081 START_USE (ptr_regno);
28082 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28083 frame_reg_rtx = ptr_reg;
28084 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
28085 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
28086 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
28087 ptr_off = info->gp_save_offset + info->gp_size;
28088 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
28089 ptr_off = info->altivec_save_offset + info->altivec_size;
28090 frame_off = -ptr_off;
28092 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28093 ptr_reg, ptr_off);
28094 if (REGNO (frame_reg_rtx) == 12)
28095 sp_adjust = 0;
28096 sp_off = info->total_size;
28097 if (frame_reg_rtx != sp_reg_rtx)
28098 rs6000_emit_stack_tie (frame_reg_rtx, false);
28101 /* If we use the link register, get it into r0. */
28102 if (!WORLD_SAVE_P (info) && info->lr_save_p
28103 && !cfun->machine->lr_is_wrapped_separately)
28105 rtx addr, reg, mem;
28107 reg = gen_rtx_REG (Pmode, 0);
28108 START_USE (0);
28109 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
28110 RTX_FRAME_RELATED_P (insn) = 1;
28112 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
28113 | SAVE_NOINLINE_FPRS_SAVES_LR)))
28115 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28116 GEN_INT (info->lr_save_offset + frame_off));
28117 mem = gen_rtx_MEM (Pmode, addr);
28118 /* This should not be of rs6000_sr_alias_set, because of
28119 __builtin_return_address. */
28121 insn = emit_move_insn (mem, reg);
28122 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28123 NULL_RTX, NULL_RTX);
28124 END_USE (0);
28128 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
28129 r12 will be needed by out-of-line gpr restore. */
28130 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28131 && !(strategy & (SAVE_INLINE_GPRS
28132 | SAVE_NOINLINE_GPRS_SAVES_LR))
28133 ? 11 : 12);
28134 if (!WORLD_SAVE_P (info)
28135 && info->cr_save_p
28136 && REGNO (frame_reg_rtx) != cr_save_regno
28137 && !(using_static_chain_p && cr_save_regno == 11)
28138 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
28140 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
28141 START_USE (cr_save_regno);
28142 rs6000_emit_move_from_cr (cr_save_rtx);
28145 /* Do any required saving of fpr's. If only one or two to save, do
28146 it ourselves. Otherwise, call function. */
28147 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
28149 int i;
28150 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28151 if (save_reg_p (info->first_fp_reg_save + i))
28152 emit_frame_save (frame_reg_rtx,
28153 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28154 ? DFmode : SFmode),
28155 info->first_fp_reg_save + i,
28156 info->fp_save_offset + frame_off + 8 * i,
28157 sp_off - frame_off);
28159 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
28161 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28162 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28163 unsigned ptr_regno = ptr_regno_for_savres (sel);
28164 rtx ptr_reg = frame_reg_rtx;
28166 if (REGNO (frame_reg_rtx) == ptr_regno)
28167 gcc_checking_assert (frame_off == 0);
28168 else
28170 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28171 NOT_INUSE (ptr_regno);
28172 emit_insn (gen_add3_insn (ptr_reg,
28173 frame_reg_rtx, GEN_INT (frame_off)));
28175 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28176 info->fp_save_offset,
28177 info->lr_save_offset,
28178 DFmode, sel);
28179 rs6000_frame_related (insn, ptr_reg, sp_off,
28180 NULL_RTX, NULL_RTX);
28181 if (lr)
28182 END_USE (0);
28185 /* Save GPRs. This is done as a PARALLEL if we are using
28186 the store-multiple instructions. */
28187 if (!WORLD_SAVE_P (info)
28188 && TARGET_SPE_ABI
28189 && info->spe_64bit_regs_used != 0
28190 && info->first_gp_reg_save != 32)
28192 int i;
28193 rtx spe_save_area_ptr;
28194 HOST_WIDE_INT save_off;
28195 int ool_adjust = 0;
28197 /* Determine whether we can address all of the registers that need
28198 to be saved with an offset from frame_reg_rtx that fits in
28199 the small const field for SPE memory instructions. */
28200 int spe_regs_addressable
28201 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28202 + reg_size * (32 - info->first_gp_reg_save - 1))
28203 && (strategy & SAVE_INLINE_GPRS));
28205 if (spe_regs_addressable)
28207 spe_save_area_ptr = frame_reg_rtx;
28208 save_off = frame_off;
28210 else
28212 /* Make r11 point to the start of the SPE save area. We need
28213 to be careful here if r11 is holding the static chain. If
28214 it is, then temporarily save it in r0. */
28215 HOST_WIDE_INT offset;
28217 if (!(strategy & SAVE_INLINE_GPRS))
28218 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28219 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
28220 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
28221 save_off = frame_off - offset;
28223 if (using_static_chain_p)
28225 rtx r0 = gen_rtx_REG (Pmode, 0);
28227 START_USE (0);
28228 gcc_assert (info->first_gp_reg_save > 11);
28230 emit_move_insn (r0, spe_save_area_ptr);
28232 else if (REGNO (frame_reg_rtx) != 11)
28233 START_USE (11);
28235 emit_insn (gen_addsi3 (spe_save_area_ptr,
28236 frame_reg_rtx, GEN_INT (offset)));
28237 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
28238 frame_off = -info->spe_gp_save_offset + ool_adjust;
28241 if ((strategy & SAVE_INLINE_GPRS))
28243 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28244 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28245 emit_frame_save (spe_save_area_ptr, reg_mode,
28246 info->first_gp_reg_save + i,
28247 (info->spe_gp_save_offset + save_off
28248 + reg_size * i),
28249 sp_off - save_off);
28251 else
28253 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
28254 info->spe_gp_save_offset + save_off,
28255 0, reg_mode,
28256 SAVRES_SAVE | SAVRES_GPR);
28258 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
28259 NULL_RTX, NULL_RTX);
28262 /* Move the static chain pointer back. */
28263 if (!spe_regs_addressable)
28265 if (using_static_chain_p)
28267 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
28268 END_USE (0);
28270 else if (REGNO (frame_reg_rtx) != 11)
28271 END_USE (11);
28274 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
28276 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
28277 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
28278 unsigned ptr_regno = ptr_regno_for_savres (sel);
28279 rtx ptr_reg = frame_reg_rtx;
28280 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
28281 int end_save = info->gp_save_offset + info->gp_size;
28282 int ptr_off;
28284 if (ptr_regno == 12)
28285 sp_adjust = 0;
28286 if (!ptr_set_up)
28287 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28289 /* Need to adjust r11 (r12) if we saved any FPRs. */
28290 if (end_save + frame_off != 0)
28292 rtx offset = GEN_INT (end_save + frame_off);
28294 if (ptr_set_up)
28295 frame_off = -end_save;
28296 else
28297 NOT_INUSE (ptr_regno);
28298 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28300 else if (!ptr_set_up)
28302 NOT_INUSE (ptr_regno);
28303 emit_move_insn (ptr_reg, frame_reg_rtx);
28305 ptr_off = -end_save;
28306 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28307 info->gp_save_offset + ptr_off,
28308 info->lr_save_offset + ptr_off,
28309 reg_mode, sel);
28310 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
28311 NULL_RTX, NULL_RTX);
28312 if (lr)
28313 END_USE (0);
28315 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
28317 rtvec p;
28318 int i;
28319 p = rtvec_alloc (32 - info->first_gp_reg_save);
28320 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28321 RTVEC_ELT (p, i)
28322 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28323 frame_reg_rtx,
28324 info->gp_save_offset + frame_off + reg_size * i);
28325 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28326 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28327 NULL_RTX, NULL_RTX);
28329 else if (!WORLD_SAVE_P (info))
28331 int offset = info->gp_save_offset + frame_off;
28332 for (int i = info->first_gp_reg_save; i < 32; i++)
28334 if (rs6000_reg_live_or_pic_offset_p (i)
28335 && !cfun->machine->gpr_is_wrapped_separately[i])
28336 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
28337 sp_off - frame_off);
28339 offset += reg_size;
28343 if (crtl->calls_eh_return)
28345 unsigned int i;
28346 rtvec p;
28348 for (i = 0; ; ++i)
28350 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28351 if (regno == INVALID_REGNUM)
28352 break;
28355 p = rtvec_alloc (i);
28357 for (i = 0; ; ++i)
28359 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28360 if (regno == INVALID_REGNUM)
28361 break;
28363 insn
28364 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
28365 sp_reg_rtx,
28366 info->ehrd_offset + sp_off + reg_size * (int) i);
28367 RTVEC_ELT (p, i) = insn;
28368 RTX_FRAME_RELATED_P (insn) = 1;
28371 insn = emit_insn (gen_blockage ());
28372 RTX_FRAME_RELATED_P (insn) = 1;
28373 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
28376 /* In AIX ABI we need to make sure r2 is really saved. */
28377 if (TARGET_AIX && crtl->calls_eh_return)
28379 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
28380 rtx save_insn, join_insn, note;
28381 long toc_restore_insn;
28383 tmp_reg = gen_rtx_REG (Pmode, 11);
28384 tmp_reg_si = gen_rtx_REG (SImode, 11);
28385 if (using_static_chain_p)
28387 START_USE (0);
28388 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
28390 else
28391 START_USE (11);
28392 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
28393 /* Peek at instruction to which this function returns. If it's
28394 restoring r2, then we know we've already saved r2. We can't
28395 unconditionally save r2 because the value we have will already
28396 be updated if we arrived at this function via a plt call or
28397 toc adjusting stub. */
28398 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
28399 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
28400 + RS6000_TOC_SAVE_SLOT);
28401 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
28402 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
28403 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
28404 validate_condition_mode (EQ, CCUNSmode);
28405 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
28406 emit_insn (gen_rtx_SET (compare_result,
28407 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
28408 toc_save_done = gen_label_rtx ();
28409 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28410 gen_rtx_EQ (VOIDmode, compare_result,
28411 const0_rtx),
28412 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
28413 pc_rtx);
28414 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28415 JUMP_LABEL (jump) = toc_save_done;
28416 LABEL_NUSES (toc_save_done) += 1;
28418 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
28419 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
28420 sp_off - frame_off);
28422 emit_label (toc_save_done);
28424 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
28425 have a CFG that has different saves along different paths.
28426 Move the note to a dummy blockage insn, which describes that
28427 R2 is unconditionally saved after the label. */
28428 /* ??? An alternate representation might be a special insn pattern
28429 containing both the branch and the store. That might let the
28430 code that minimizes the number of DW_CFA_advance opcodes better
28431 freedom in placing the annotations. */
28432 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
28433 if (note)
28434 remove_note (save_insn, note);
28435 else
28436 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
28437 copy_rtx (PATTERN (save_insn)), NULL_RTX);
28438 RTX_FRAME_RELATED_P (save_insn) = 0;
28440 join_insn = emit_insn (gen_blockage ());
28441 REG_NOTES (join_insn) = note;
28442 RTX_FRAME_RELATED_P (join_insn) = 1;
28444 if (using_static_chain_p)
28446 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
28447 END_USE (0);
28449 else
28450 END_USE (11);
28453 /* Save CR if we use any that must be preserved. */
28454 if (!WORLD_SAVE_P (info) && info->cr_save_p)
28456 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28457 GEN_INT (info->cr_save_offset + frame_off));
28458 rtx mem = gen_frame_mem (SImode, addr);
28460 /* If we didn't copy cr before, do so now using r0. */
28461 if (cr_save_rtx == NULL_RTX)
28463 START_USE (0);
28464 cr_save_rtx = gen_rtx_REG (SImode, 0);
28465 rs6000_emit_move_from_cr (cr_save_rtx);
28468 /* Saving CR requires a two-instruction sequence: one instruction
28469 to move the CR to a general-purpose register, and a second
28470 instruction that stores the GPR to memory.
28472 We do not emit any DWARF CFI records for the first of these,
28473 because we cannot properly represent the fact that CR is saved in
28474 a register. One reason is that we cannot express that multiple
28475 CR fields are saved; another reason is that on 64-bit, the size
28476 of the CR register in DWARF (4 bytes) differs from the size of
28477 a general-purpose register.
28479 This means if any intervening instruction were to clobber one of
28480 the call-saved CR fields, we'd have incorrect CFI. To prevent
28481 this from happening, we mark the store to memory as a use of
28482 those CR fields, which prevents any such instruction from being
28483 scheduled in between the two instructions. */
28484 rtx crsave_v[9];
28485 int n_crsave = 0;
28486 int i;
28488 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
28489 for (i = 0; i < 8; i++)
28490 if (save_reg_p (CR0_REGNO + i))
28491 crsave_v[n_crsave++]
28492 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28494 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
28495 gen_rtvec_v (n_crsave, crsave_v)));
28496 END_USE (REGNO (cr_save_rtx));
28498 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
28499 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
28500 so we need to construct a frame expression manually. */
28501 RTX_FRAME_RELATED_P (insn) = 1;
28503 /* Update address to be stack-pointer relative, like
28504 rs6000_frame_related would do. */
28505 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28506 GEN_INT (info->cr_save_offset + sp_off));
28507 mem = gen_frame_mem (SImode, addr);
28509 if (DEFAULT_ABI == ABI_ELFv2)
28511 /* In the ELFv2 ABI we generate separate CFI records for each
28512 CR field that was actually saved. They all point to the
28513 same 32-bit stack slot. */
28514 rtx crframe[8];
28515 int n_crframe = 0;
28517 for (i = 0; i < 8; i++)
28518 if (save_reg_p (CR0_REGNO + i))
28520 crframe[n_crframe]
28521 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
28523 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
28524 n_crframe++;
28527 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28528 gen_rtx_PARALLEL (VOIDmode,
28529 gen_rtvec_v (n_crframe, crframe)));
28531 else
28533 /* In other ABIs, by convention, we use a single CR regnum to
28534 represent the fact that all call-saved CR fields are saved.
28535 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
28536 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
28537 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
28541 /* In the ELFv2 ABI we need to save all call-saved CR fields into
28542 *separate* slots if the routine calls __builtin_eh_return, so
28543 that they can be independently restored by the unwinder. */
28544 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28546 int i, cr_off = info->ehcr_offset;
28547 rtx crsave;
28549 /* ??? We might get better performance by using multiple mfocrf
28550 instructions. */
28551 crsave = gen_rtx_REG (SImode, 0);
28552 emit_insn (gen_movesi_from_cr (crsave));
28554 for (i = 0; i < 8; i++)
28555 if (!call_used_regs[CR0_REGNO + i])
28557 rtvec p = rtvec_alloc (2);
28558 RTVEC_ELT (p, 0)
28559 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
28560 RTVEC_ELT (p, 1)
28561 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28563 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28565 RTX_FRAME_RELATED_P (insn) = 1;
28566 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28567 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
28568 sp_reg_rtx, cr_off + sp_off));
28570 cr_off += reg_size;
28574 /* Update stack and set back pointer unless this is V.4,
28575 for which it was done previously. */
28576 if (!WORLD_SAVE_P (info) && info->push_p
28577 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
28579 rtx ptr_reg = NULL;
28580 int ptr_off = 0;
28582 /* If saving altivec regs we need to be able to address all save
28583 locations using a 16-bit offset. */
28584 if ((strategy & SAVE_INLINE_VRS) == 0
28585 || (info->altivec_size != 0
28586 && (info->altivec_save_offset + info->altivec_size - 16
28587 + info->total_size - frame_off) > 32767)
28588 || (info->vrsave_size != 0
28589 && (info->vrsave_save_offset
28590 + info->total_size - frame_off) > 32767))
28592 int sel = SAVRES_SAVE | SAVRES_VR;
28593 unsigned ptr_regno = ptr_regno_for_savres (sel);
28595 if (using_static_chain_p
28596 && ptr_regno == STATIC_CHAIN_REGNUM)
28597 ptr_regno = 12;
28598 if (REGNO (frame_reg_rtx) != ptr_regno)
28599 START_USE (ptr_regno);
28600 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28601 frame_reg_rtx = ptr_reg;
28602 ptr_off = info->altivec_save_offset + info->altivec_size;
28603 frame_off = -ptr_off;
28605 else if (REGNO (frame_reg_rtx) == 1)
28606 frame_off = info->total_size;
28607 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28608 ptr_reg, ptr_off);
28609 if (REGNO (frame_reg_rtx) == 12)
28610 sp_adjust = 0;
28611 sp_off = info->total_size;
28612 if (frame_reg_rtx != sp_reg_rtx)
28613 rs6000_emit_stack_tie (frame_reg_rtx, false);
28616 /* Set frame pointer, if needed. */
28617 if (frame_pointer_needed)
28619 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
28620 sp_reg_rtx);
28621 RTX_FRAME_RELATED_P (insn) = 1;
28624 /* Save AltiVec registers if needed. Save here because the red zone does
28625 not always include AltiVec registers. */
28626 if (!WORLD_SAVE_P (info)
28627 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
28629 int end_save = info->altivec_save_offset + info->altivec_size;
28630 int ptr_off;
28631 /* Oddly, the vector save/restore functions point r0 at the end
28632 of the save area, then use r11 or r12 to load offsets for
28633 [reg+reg] addressing. */
28634 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28635 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
28636 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28638 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28639 NOT_INUSE (0);
28640 if (scratch_regno == 12)
28641 sp_adjust = 0;
28642 if (end_save + frame_off != 0)
28644 rtx offset = GEN_INT (end_save + frame_off);
28646 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28648 else
28649 emit_move_insn (ptr_reg, frame_reg_rtx);
28651 ptr_off = -end_save;
28652 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28653 info->altivec_save_offset + ptr_off,
28654 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
28655 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
28656 NULL_RTX, NULL_RTX);
28657 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28659 /* The oddity mentioned above clobbered our frame reg. */
28660 emit_move_insn (frame_reg_rtx, ptr_reg);
28661 frame_off = ptr_off;
28664 else if (!WORLD_SAVE_P (info)
28665 && info->altivec_size != 0)
28667 int i;
28669 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28670 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28672 rtx areg, savereg, mem;
28673 HOST_WIDE_INT offset;
28675 offset = (info->altivec_save_offset + frame_off
28676 + 16 * (i - info->first_altivec_reg_save));
28678 savereg = gen_rtx_REG (V4SImode, i);
28680 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28682 mem = gen_frame_mem (V4SImode,
28683 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28684 GEN_INT (offset)));
28685 insn = emit_insn (gen_rtx_SET (mem, savereg));
28686 areg = NULL_RTX;
28688 else
28690 NOT_INUSE (0);
28691 areg = gen_rtx_REG (Pmode, 0);
28692 emit_move_insn (areg, GEN_INT (offset));
28694 /* AltiVec addressing mode is [reg+reg]. */
28695 mem = gen_frame_mem (V4SImode,
28696 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
28698 /* Rather than emitting a generic move, force use of the stvx
28699 instruction, which we always want on ISA 2.07 (power8) systems.
28700 In particular we don't want xxpermdi/stxvd2x for little
28701 endian. */
28702 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
28705 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28706 areg, GEN_INT (offset));
28710 /* VRSAVE is a bit vector representing which AltiVec registers
28711 are used. The OS uses this to determine which vector
28712 registers to save on a context switch. We need to save
28713 VRSAVE on the stack frame, add whatever AltiVec registers we
28714 used in this function, and do the corresponding magic in the
28715 epilogue. */
28717 if (!WORLD_SAVE_P (info)
28718 && info->vrsave_size != 0)
28720 rtx reg, vrsave;
28721 int offset;
28722 int save_regno;
28724 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
28725 be using r12 as frame_reg_rtx and r11 as the static chain
28726 pointer for nested functions. */
28727 save_regno = 12;
28728 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28729 && !using_static_chain_p)
28730 save_regno = 11;
28731 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
28733 save_regno = 11;
28734 if (using_static_chain_p)
28735 save_regno = 0;
28738 NOT_INUSE (save_regno);
28739 reg = gen_rtx_REG (SImode, save_regno);
28740 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28741 if (TARGET_MACHO)
28742 emit_insn (gen_get_vrsave_internal (reg));
28743 else
28744 emit_insn (gen_rtx_SET (reg, vrsave));
28746 /* Save VRSAVE. */
28747 offset = info->vrsave_save_offset + frame_off;
28748 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
28750 /* Include the registers in the mask. */
28751 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
28753 insn = emit_insn (generate_set_vrsave (reg, info, 0));
28756 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
28757 if (!TARGET_SINGLE_PIC_BASE
28758 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
28759 || (DEFAULT_ABI == ABI_V4
28760 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
28761 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
28763 /* If emit_load_toc_table will use the link register, we need to save
28764 it. We use R12 for this purpose because emit_load_toc_table
28765 can use register 0. This allows us to use a plain 'blr' to return
28766 from the procedure more often. */
28767 int save_LR_around_toc_setup = (TARGET_ELF
28768 && DEFAULT_ABI == ABI_V4
28769 && flag_pic
28770 && ! info->lr_save_p
28771 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
28772 if (save_LR_around_toc_setup)
28774 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28775 rtx tmp = gen_rtx_REG (Pmode, 12);
28777 sp_adjust = 0;
28778 insn = emit_move_insn (tmp, lr);
28779 RTX_FRAME_RELATED_P (insn) = 1;
28781 rs6000_emit_load_toc_table (TRUE);
28783 insn = emit_move_insn (lr, tmp);
28784 add_reg_note (insn, REG_CFA_RESTORE, lr);
28785 RTX_FRAME_RELATED_P (insn) = 1;
28787 else
28788 rs6000_emit_load_toc_table (TRUE);
28791 #if TARGET_MACHO
28792 if (!TARGET_SINGLE_PIC_BASE
28793 && DEFAULT_ABI == ABI_DARWIN
28794 && flag_pic && crtl->uses_pic_offset_table)
28796 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28797 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
28799 /* Save and restore LR locally around this call (in R0). */
28800 if (!info->lr_save_p)
28801 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
28803 emit_insn (gen_load_macho_picbase (src));
28805 emit_move_insn (gen_rtx_REG (Pmode,
28806 RS6000_PIC_OFFSET_TABLE_REGNUM),
28807 lr);
28809 if (!info->lr_save_p)
28810 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
28812 #endif
28814 /* If we need to, save the TOC register after doing the stack setup.
28815 Do not emit eh frame info for this save. The unwinder wants info,
28816 conceptually attached to instructions in this function, about
28817 register values in the caller of this function. This R2 may have
28818 already been changed from the value in the caller.
28819 We don't attempt to write accurate DWARF EH frame info for R2
28820 because code emitted by gcc for a (non-pointer) function call
28821 doesn't save and restore R2. Instead, R2 is managed out-of-line
28822 by a linker generated plt call stub when the function resides in
28823 a shared library. This behavior is costly to describe in DWARF,
28824 both in terms of the size of DWARF info and the time taken in the
28825 unwinder to interpret it. R2 changes, apart from the
28826 calls_eh_return case earlier in this function, are handled by
28827 linux-unwind.h frob_update_context. */
28828 if (rs6000_save_toc_in_prologue_p ())
28830 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
28831 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
28834 if (using_split_stack && split_stack_arg_pointer_used_p ())
28836 /* Set up the arg pointer (r12) for -fsplit-stack code. If
28837 __morestack was called, it left the arg pointer to the old
28838 stack in r29. Otherwise, the arg pointer is the top of the
28839 current frame. */
28840 cfun->machine->split_stack_argp_used = true;
28841 if (sp_adjust)
28843 rtx r12 = gen_rtx_REG (Pmode, 12);
28844 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
28845 emit_insn_before (set_r12, sp_adjust);
28847 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
28849 rtx r12 = gen_rtx_REG (Pmode, 12);
28850 if (frame_off == 0)
28851 emit_move_insn (r12, frame_reg_rtx);
28852 else
28853 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
28855 if (info->push_p)
28857 rtx r12 = gen_rtx_REG (Pmode, 12);
28858 rtx r29 = gen_rtx_REG (Pmode, 29);
28859 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28860 rtx not_more = gen_label_rtx ();
28861 rtx jump;
28863 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28864 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
28865 gen_rtx_LABEL_REF (VOIDmode, not_more),
28866 pc_rtx);
28867 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28868 JUMP_LABEL (jump) = not_more;
28869 LABEL_NUSES (not_more) += 1;
28870 emit_move_insn (r12, r29);
28871 emit_label (not_more);
28876 /* Output .extern statements for the save/restore routines we use. */
28878 static void
28879 rs6000_output_savres_externs (FILE *file)
28881 rs6000_stack_t *info = rs6000_stack_info ();
28883 if (TARGET_DEBUG_STACK)
28884 debug_stack_info (info);
28886 /* Write .extern for any function we will call to save and restore
28887 fp values. */
28888 if (info->first_fp_reg_save < 64
28889 && !TARGET_MACHO
28890 && !TARGET_ELF)
28892 char *name;
28893 int regno = info->first_fp_reg_save - 32;
28895 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
28897 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28898 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28899 name = rs6000_savres_routine_name (info, regno, sel);
28900 fprintf (file, "\t.extern %s\n", name);
28902 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
28904 bool lr = (info->savres_strategy
28905 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28906 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28907 name = rs6000_savres_routine_name (info, regno, sel);
28908 fprintf (file, "\t.extern %s\n", name);
28913 /* Write function prologue. */
28915 static void
28916 rs6000_output_function_prologue (FILE *file,
28917 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28919 if (!cfun->is_thunk)
28920 rs6000_output_savres_externs (file);
28922 /* ELFv2 ABI r2 setup code and local entry point. This must follow
28923 immediately after the global entry point label. */
28924 if (rs6000_global_entry_point_needed_p ())
28926 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28928 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
28930 if (TARGET_CMODEL != CMODEL_LARGE)
28932 /* In the small and medium code models, we assume the TOC is less
28933 2 GB away from the text section, so it can be computed via the
28934 following two-instruction sequence. */
28935 char buf[256];
28937 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28938 fprintf (file, "0:\taddis 2,12,.TOC.-");
28939 assemble_name (file, buf);
28940 fprintf (file, "@ha\n");
28941 fprintf (file, "\taddi 2,2,.TOC.-");
28942 assemble_name (file, buf);
28943 fprintf (file, "@l\n");
28945 else
28947 /* In the large code model, we allow arbitrary offsets between the
28948 TOC and the text section, so we have to load the offset from
28949 memory. The data field is emitted directly before the global
28950 entry point in rs6000_elf_declare_function_name. */
28951 char buf[256];
28953 #ifdef HAVE_AS_ENTRY_MARKERS
28954 /* If supported by the linker, emit a marker relocation. If the
28955 total code size of the final executable or shared library
28956 happens to fit into 2 GB after all, the linker will replace
28957 this code sequence with the sequence for the small or medium
28958 code model. */
28959 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
28960 #endif
28961 fprintf (file, "\tld 2,");
28962 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28963 assemble_name (file, buf);
28964 fprintf (file, "-");
28965 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28966 assemble_name (file, buf);
28967 fprintf (file, "(12)\n");
28968 fprintf (file, "\tadd 2,2,12\n");
28971 fputs ("\t.localentry\t", file);
28972 assemble_name (file, name);
28973 fputs (",.-", file);
28974 assemble_name (file, name);
28975 fputs ("\n", file);
28978 /* Output -mprofile-kernel code. This needs to be done here instead of
28979 in output_function_profile since it must go after the ELFv2 ABI
28980 local entry point. */
28981 if (TARGET_PROFILE_KERNEL && crtl->profile)
28983 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28984 gcc_assert (!TARGET_32BIT);
28986 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28988 /* In the ELFv2 ABI we have no compiler stack word. It must be
28989 the resposibility of _mcount to preserve the static chain
28990 register if required. */
28991 if (DEFAULT_ABI != ABI_ELFv2
28992 && cfun->static_chain_decl != NULL)
28994 asm_fprintf (file, "\tstd %s,24(%s)\n",
28995 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28996 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28997 asm_fprintf (file, "\tld %s,24(%s)\n",
28998 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29000 else
29001 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29004 rs6000_pic_labelno++;
29007 /* -mprofile-kernel code calls mcount before the function prolog,
29008 so a profiled leaf function should stay a leaf function. */
29009 static bool
29010 rs6000_keep_leaf_when_profiled ()
29012 return TARGET_PROFILE_KERNEL;
29015 /* Non-zero if vmx regs are restored before the frame pop, zero if
29016 we restore after the pop when possible. */
29017 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
29019 /* Restoring cr is a two step process: loading a reg from the frame
29020 save, then moving the reg to cr. For ABI_V4 we must let the
29021 unwinder know that the stack location is no longer valid at or
29022 before the stack deallocation, but we can't emit a cfa_restore for
29023 cr at the stack deallocation like we do for other registers.
29024 The trouble is that it is possible for the move to cr to be
29025 scheduled after the stack deallocation. So say exactly where cr
29026 is located on each of the two insns. */
29028 static rtx
29029 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
29031 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
29032 rtx reg = gen_rtx_REG (SImode, regno);
29033 rtx_insn *insn = emit_move_insn (reg, mem);
29035 if (!exit_func && DEFAULT_ABI == ABI_V4)
29037 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29038 rtx set = gen_rtx_SET (reg, cr);
29040 add_reg_note (insn, REG_CFA_REGISTER, set);
29041 RTX_FRAME_RELATED_P (insn) = 1;
29043 return reg;
29046 /* Reload CR from REG. */
29048 static void
29049 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
29051 int count = 0;
29052 int i;
29054 if (using_mfcr_multiple)
29056 for (i = 0; i < 8; i++)
29057 if (save_reg_p (CR0_REGNO + i))
29058 count++;
29059 gcc_assert (count);
29062 if (using_mfcr_multiple && count > 1)
29064 rtx_insn *insn;
29065 rtvec p;
29066 int ndx;
29068 p = rtvec_alloc (count);
29070 ndx = 0;
29071 for (i = 0; i < 8; i++)
29072 if (save_reg_p (CR0_REGNO + i))
29074 rtvec r = rtvec_alloc (2);
29075 RTVEC_ELT (r, 0) = reg;
29076 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
29077 RTVEC_ELT (p, ndx) =
29078 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
29079 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
29080 ndx++;
29082 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29083 gcc_assert (ndx == count);
29085 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29086 CR field separately. */
29087 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29089 for (i = 0; i < 8; i++)
29090 if (save_reg_p (CR0_REGNO + i))
29091 add_reg_note (insn, REG_CFA_RESTORE,
29092 gen_rtx_REG (SImode, CR0_REGNO + i));
29094 RTX_FRAME_RELATED_P (insn) = 1;
29097 else
29098 for (i = 0; i < 8; i++)
29099 if (save_reg_p (CR0_REGNO + i))
29101 rtx insn = emit_insn (gen_movsi_to_cr_one
29102 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29104 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29105 CR field separately, attached to the insn that in fact
29106 restores this particular CR field. */
29107 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29109 add_reg_note (insn, REG_CFA_RESTORE,
29110 gen_rtx_REG (SImode, CR0_REGNO + i));
29112 RTX_FRAME_RELATED_P (insn) = 1;
29116 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
29117 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
29118 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29120 rtx_insn *insn = get_last_insn ();
29121 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29123 add_reg_note (insn, REG_CFA_RESTORE, cr);
29124 RTX_FRAME_RELATED_P (insn) = 1;
29128 /* Like cr, the move to lr instruction can be scheduled after the
29129 stack deallocation, but unlike cr, its stack frame save is still
29130 valid. So we only need to emit the cfa_restore on the correct
29131 instruction. */
29133 static void
29134 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
29136 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
29137 rtx reg = gen_rtx_REG (Pmode, regno);
29139 emit_move_insn (reg, mem);
29142 static void
29143 restore_saved_lr (int regno, bool exit_func)
29145 rtx reg = gen_rtx_REG (Pmode, regno);
29146 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29147 rtx_insn *insn = emit_move_insn (lr, reg);
29149 if (!exit_func && flag_shrink_wrap)
29151 add_reg_note (insn, REG_CFA_RESTORE, lr);
29152 RTX_FRAME_RELATED_P (insn) = 1;
29156 static rtx
29157 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
29159 if (DEFAULT_ABI == ABI_ELFv2)
29161 int i;
29162 for (i = 0; i < 8; i++)
29163 if (save_reg_p (CR0_REGNO + i))
29165 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
29166 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
29167 cfa_restores);
29170 else if (info->cr_save_p)
29171 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29172 gen_rtx_REG (SImode, CR2_REGNO),
29173 cfa_restores);
29175 if (info->lr_save_p)
29176 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29177 gen_rtx_REG (Pmode, LR_REGNO),
29178 cfa_restores);
29179 return cfa_restores;
29182 /* Return true if OFFSET from stack pointer can be clobbered by signals.
29183 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
29184 below stack pointer not cloberred by signals. */
29186 static inline bool
29187 offset_below_red_zone_p (HOST_WIDE_INT offset)
29189 return offset < (DEFAULT_ABI == ABI_V4
29191 : TARGET_32BIT ? -220 : -288);
29194 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
29196 static void
29197 emit_cfa_restores (rtx cfa_restores)
29199 rtx_insn *insn = get_last_insn ();
29200 rtx *loc = &REG_NOTES (insn);
29202 while (*loc)
29203 loc = &XEXP (*loc, 1);
29204 *loc = cfa_restores;
29205 RTX_FRAME_RELATED_P (insn) = 1;
29208 /* Emit function epilogue as insns. */
29210 void
29211 rs6000_emit_epilogue (int sibcall)
29213 rs6000_stack_t *info;
29214 int restoring_GPRs_inline;
29215 int restoring_FPRs_inline;
29216 int using_load_multiple;
29217 int using_mtcr_multiple;
29218 int use_backchain_to_restore_sp;
29219 int restore_lr;
29220 int strategy;
29221 HOST_WIDE_INT frame_off = 0;
29222 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
29223 rtx frame_reg_rtx = sp_reg_rtx;
29224 rtx cfa_restores = NULL_RTX;
29225 rtx insn;
29226 rtx cr_save_reg = NULL_RTX;
29227 machine_mode reg_mode = Pmode;
29228 int reg_size = TARGET_32BIT ? 4 : 8;
29229 int i;
29230 bool exit_func;
29231 unsigned ptr_regno;
29233 info = rs6000_stack_info ();
29235 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29237 reg_mode = V2SImode;
29238 reg_size = 8;
29241 strategy = info->savres_strategy;
29242 using_load_multiple = strategy & REST_MULTIPLE;
29243 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
29244 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
29245 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
29246 || rs6000_cpu == PROCESSOR_PPC603
29247 || rs6000_cpu == PROCESSOR_PPC750
29248 || optimize_size);
29249 /* Restore via the backchain when we have a large frame, since this
29250 is more efficient than an addis, addi pair. The second condition
29251 here will not trigger at the moment; We don't actually need a
29252 frame pointer for alloca, but the generic parts of the compiler
29253 give us one anyway. */
29254 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
29255 ? info->lr_save_offset
29256 : 0) > 32767
29257 || (cfun->calls_alloca
29258 && !frame_pointer_needed));
29259 restore_lr = (info->lr_save_p
29260 && (restoring_FPRs_inline
29261 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
29262 && (restoring_GPRs_inline
29263 || info->first_fp_reg_save < 64)
29264 && !cfun->machine->lr_is_wrapped_separately);
29267 if (WORLD_SAVE_P (info))
29269 int i, j;
29270 char rname[30];
29271 const char *alloc_rname;
29272 rtvec p;
29274 /* eh_rest_world_r10 will return to the location saved in the LR
29275 stack slot (which is not likely to be our caller.)
29276 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
29277 rest_world is similar, except any R10 parameter is ignored.
29278 The exception-handling stuff that was here in 2.95 is no
29279 longer necessary. */
29281 p = rtvec_alloc (9
29282 + 32 - info->first_gp_reg_save
29283 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
29284 + 63 + 1 - info->first_fp_reg_save);
29286 strcpy (rname, ((crtl->calls_eh_return) ?
29287 "*eh_rest_world_r10" : "*rest_world"));
29288 alloc_rname = ggc_strdup (rname);
29290 j = 0;
29291 RTVEC_ELT (p, j++) = ret_rtx;
29292 RTVEC_ELT (p, j++)
29293 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
29294 /* The instruction pattern requires a clobber here;
29295 it is shared with the restVEC helper. */
29296 RTVEC_ELT (p, j++)
29297 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
29300 /* CR register traditionally saved as CR2. */
29301 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
29302 RTVEC_ELT (p, j++)
29303 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
29304 if (flag_shrink_wrap)
29306 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29307 gen_rtx_REG (Pmode, LR_REGNO),
29308 cfa_restores);
29309 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29313 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29315 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29316 RTVEC_ELT (p, j++)
29317 = gen_frame_load (reg,
29318 frame_reg_rtx, info->gp_save_offset + reg_size * i);
29319 if (flag_shrink_wrap)
29320 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29322 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29324 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
29325 RTVEC_ELT (p, j++)
29326 = gen_frame_load (reg,
29327 frame_reg_rtx, info->altivec_save_offset + 16 * i);
29328 if (flag_shrink_wrap)
29329 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29331 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
29333 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29334 ? DFmode : SFmode),
29335 info->first_fp_reg_save + i);
29336 RTVEC_ELT (p, j++)
29337 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
29338 if (flag_shrink_wrap)
29339 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29341 RTVEC_ELT (p, j++)
29342 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
29343 RTVEC_ELT (p, j++)
29344 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
29345 RTVEC_ELT (p, j++)
29346 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
29347 RTVEC_ELT (p, j++)
29348 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
29349 RTVEC_ELT (p, j++)
29350 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
29351 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29353 if (flag_shrink_wrap)
29355 REG_NOTES (insn) = cfa_restores;
29356 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29357 RTX_FRAME_RELATED_P (insn) = 1;
29359 return;
29362 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
29363 if (info->push_p)
29364 frame_off = info->total_size;
29366 /* Restore AltiVec registers if we must do so before adjusting the
29367 stack. */
29368 if (info->altivec_size != 0
29369 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29370 || (DEFAULT_ABI != ABI_V4
29371 && offset_below_red_zone_p (info->altivec_save_offset))))
29373 int i;
29374 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29376 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29377 if (use_backchain_to_restore_sp)
29379 int frame_regno = 11;
29381 if ((strategy & REST_INLINE_VRS) == 0)
29383 /* Of r11 and r12, select the one not clobbered by an
29384 out-of-line restore function for the frame register. */
29385 frame_regno = 11 + 12 - scratch_regno;
29387 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
29388 emit_move_insn (frame_reg_rtx,
29389 gen_rtx_MEM (Pmode, sp_reg_rtx));
29390 frame_off = 0;
29392 else if (frame_pointer_needed)
29393 frame_reg_rtx = hard_frame_pointer_rtx;
29395 if ((strategy & REST_INLINE_VRS) == 0)
29397 int end_save = info->altivec_save_offset + info->altivec_size;
29398 int ptr_off;
29399 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29400 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29402 if (end_save + frame_off != 0)
29404 rtx offset = GEN_INT (end_save + frame_off);
29406 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29408 else
29409 emit_move_insn (ptr_reg, frame_reg_rtx);
29411 ptr_off = -end_save;
29412 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29413 info->altivec_save_offset + ptr_off,
29414 0, V4SImode, SAVRES_VR);
29416 else
29418 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29419 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29421 rtx addr, areg, mem, insn;
29422 rtx reg = gen_rtx_REG (V4SImode, i);
29423 HOST_WIDE_INT offset
29424 = (info->altivec_save_offset + frame_off
29425 + 16 * (i - info->first_altivec_reg_save));
29427 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29429 mem = gen_frame_mem (V4SImode,
29430 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29431 GEN_INT (offset)));
29432 insn = gen_rtx_SET (reg, mem);
29434 else
29436 areg = gen_rtx_REG (Pmode, 0);
29437 emit_move_insn (areg, GEN_INT (offset));
29439 /* AltiVec addressing mode is [reg+reg]. */
29440 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29441 mem = gen_frame_mem (V4SImode, addr);
29443 /* Rather than emitting a generic move, force use of the
29444 lvx instruction, which we always want. In particular we
29445 don't want lxvd2x/xxpermdi for little endian. */
29446 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29449 (void) emit_insn (insn);
29453 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29454 if (((strategy & REST_INLINE_VRS) == 0
29455 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29456 && (flag_shrink_wrap
29457 || (offset_below_red_zone_p
29458 (info->altivec_save_offset
29459 + 16 * (i - info->first_altivec_reg_save)))))
29461 rtx reg = gen_rtx_REG (V4SImode, i);
29462 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29466 /* Restore VRSAVE if we must do so before adjusting the stack. */
29467 if (info->vrsave_size != 0
29468 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29469 || (DEFAULT_ABI != ABI_V4
29470 && offset_below_red_zone_p (info->vrsave_save_offset))))
29472 rtx reg;
29474 if (frame_reg_rtx == sp_reg_rtx)
29476 if (use_backchain_to_restore_sp)
29478 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29479 emit_move_insn (frame_reg_rtx,
29480 gen_rtx_MEM (Pmode, sp_reg_rtx));
29481 frame_off = 0;
29483 else if (frame_pointer_needed)
29484 frame_reg_rtx = hard_frame_pointer_rtx;
29487 reg = gen_rtx_REG (SImode, 12);
29488 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29489 info->vrsave_save_offset + frame_off));
29491 emit_insn (generate_set_vrsave (reg, info, 1));
29494 insn = NULL_RTX;
29495 /* If we have a large stack frame, restore the old stack pointer
29496 using the backchain. */
29497 if (use_backchain_to_restore_sp)
29499 if (frame_reg_rtx == sp_reg_rtx)
29501 /* Under V.4, don't reset the stack pointer until after we're done
29502 loading the saved registers. */
29503 if (DEFAULT_ABI == ABI_V4)
29504 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29506 insn = emit_move_insn (frame_reg_rtx,
29507 gen_rtx_MEM (Pmode, sp_reg_rtx));
29508 frame_off = 0;
29510 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29511 && DEFAULT_ABI == ABI_V4)
29512 /* frame_reg_rtx has been set up by the altivec restore. */
29514 else
29516 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
29517 frame_reg_rtx = sp_reg_rtx;
29520 /* If we have a frame pointer, we can restore the old stack pointer
29521 from it. */
29522 else if (frame_pointer_needed)
29524 frame_reg_rtx = sp_reg_rtx;
29525 if (DEFAULT_ABI == ABI_V4)
29526 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29527 /* Prevent reordering memory accesses against stack pointer restore. */
29528 else if (cfun->calls_alloca
29529 || offset_below_red_zone_p (-info->total_size))
29530 rs6000_emit_stack_tie (frame_reg_rtx, true);
29532 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
29533 GEN_INT (info->total_size)));
29534 frame_off = 0;
29536 else if (info->push_p
29537 && DEFAULT_ABI != ABI_V4
29538 && !crtl->calls_eh_return)
29540 /* Prevent reordering memory accesses against stack pointer restore. */
29541 if (cfun->calls_alloca
29542 || offset_below_red_zone_p (-info->total_size))
29543 rs6000_emit_stack_tie (frame_reg_rtx, false);
29544 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
29545 GEN_INT (info->total_size)));
29546 frame_off = 0;
29548 if (insn && frame_reg_rtx == sp_reg_rtx)
29550 if (cfa_restores)
29552 REG_NOTES (insn) = cfa_restores;
29553 cfa_restores = NULL_RTX;
29555 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29556 RTX_FRAME_RELATED_P (insn) = 1;
29559 /* Restore AltiVec registers if we have not done so already. */
29560 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29561 && info->altivec_size != 0
29562 && (DEFAULT_ABI == ABI_V4
29563 || !offset_below_red_zone_p (info->altivec_save_offset)))
29565 int i;
29567 if ((strategy & REST_INLINE_VRS) == 0)
29569 int end_save = info->altivec_save_offset + info->altivec_size;
29570 int ptr_off;
29571 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29572 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29573 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29575 if (end_save + frame_off != 0)
29577 rtx offset = GEN_INT (end_save + frame_off);
29579 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29581 else
29582 emit_move_insn (ptr_reg, frame_reg_rtx);
29584 ptr_off = -end_save;
29585 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29586 info->altivec_save_offset + ptr_off,
29587 0, V4SImode, SAVRES_VR);
29588 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29590 /* Frame reg was clobbered by out-of-line save. Restore it
29591 from ptr_reg, and if we are calling out-of-line gpr or
29592 fpr restore set up the correct pointer and offset. */
29593 unsigned newptr_regno = 1;
29594 if (!restoring_GPRs_inline)
29596 bool lr = info->gp_save_offset + info->gp_size == 0;
29597 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29598 newptr_regno = ptr_regno_for_savres (sel);
29599 end_save = info->gp_save_offset + info->gp_size;
29601 else if (!restoring_FPRs_inline)
29603 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
29604 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29605 newptr_regno = ptr_regno_for_savres (sel);
29606 end_save = info->fp_save_offset + info->fp_size;
29609 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
29610 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
29612 if (end_save + ptr_off != 0)
29614 rtx offset = GEN_INT (end_save + ptr_off);
29616 frame_off = -end_save;
29617 if (TARGET_32BIT)
29618 emit_insn (gen_addsi3_carry (frame_reg_rtx,
29619 ptr_reg, offset));
29620 else
29621 emit_insn (gen_adddi3_carry (frame_reg_rtx,
29622 ptr_reg, offset));
29624 else
29626 frame_off = ptr_off;
29627 emit_move_insn (frame_reg_rtx, ptr_reg);
29631 else
29633 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29634 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29636 rtx addr, areg, mem, insn;
29637 rtx reg = gen_rtx_REG (V4SImode, i);
29638 HOST_WIDE_INT offset
29639 = (info->altivec_save_offset + frame_off
29640 + 16 * (i - info->first_altivec_reg_save));
29642 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29644 mem = gen_frame_mem (V4SImode,
29645 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29646 GEN_INT (offset)));
29647 insn = gen_rtx_SET (reg, mem);
29649 else
29651 areg = gen_rtx_REG (Pmode, 0);
29652 emit_move_insn (areg, GEN_INT (offset));
29654 /* AltiVec addressing mode is [reg+reg]. */
29655 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29656 mem = gen_frame_mem (V4SImode, addr);
29658 /* Rather than emitting a generic move, force use of the
29659 lvx instruction, which we always want. In particular we
29660 don't want lxvd2x/xxpermdi for little endian. */
29661 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29664 (void) emit_insn (insn);
29668 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29669 if (((strategy & REST_INLINE_VRS) == 0
29670 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29671 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29673 rtx reg = gen_rtx_REG (V4SImode, i);
29674 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29678 /* Restore VRSAVE if we have not done so already. */
29679 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29680 && info->vrsave_size != 0
29681 && (DEFAULT_ABI == ABI_V4
29682 || !offset_below_red_zone_p (info->vrsave_save_offset)))
29684 rtx reg;
29686 reg = gen_rtx_REG (SImode, 12);
29687 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29688 info->vrsave_save_offset + frame_off));
29690 emit_insn (generate_set_vrsave (reg, info, 1));
29693 /* If we exit by an out-of-line restore function on ABI_V4 then that
29694 function will deallocate the stack, so we don't need to worry
29695 about the unwinder restoring cr from an invalid stack frame
29696 location. */
29697 exit_func = (!restoring_FPRs_inline
29698 || (!restoring_GPRs_inline
29699 && info->first_fp_reg_save == 64));
29701 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
29702 *separate* slots if the routine calls __builtin_eh_return, so
29703 that they can be independently restored by the unwinder. */
29704 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29706 int i, cr_off = info->ehcr_offset;
29708 for (i = 0; i < 8; i++)
29709 if (!call_used_regs[CR0_REGNO + i])
29711 rtx reg = gen_rtx_REG (SImode, 0);
29712 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29713 cr_off + frame_off));
29715 insn = emit_insn (gen_movsi_to_cr_one
29716 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29718 if (!exit_func && flag_shrink_wrap)
29720 add_reg_note (insn, REG_CFA_RESTORE,
29721 gen_rtx_REG (SImode, CR0_REGNO + i));
29723 RTX_FRAME_RELATED_P (insn) = 1;
29726 cr_off += reg_size;
29730 /* Get the old lr if we saved it. If we are restoring registers
29731 out-of-line, then the out-of-line routines can do this for us. */
29732 if (restore_lr && restoring_GPRs_inline)
29733 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29735 /* Get the old cr if we saved it. */
29736 if (info->cr_save_p)
29738 unsigned cr_save_regno = 12;
29740 if (!restoring_GPRs_inline)
29742 /* Ensure we don't use the register used by the out-of-line
29743 gpr register restore below. */
29744 bool lr = info->gp_save_offset + info->gp_size == 0;
29745 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29746 int gpr_ptr_regno = ptr_regno_for_savres (sel);
29748 if (gpr_ptr_regno == 12)
29749 cr_save_regno = 11;
29750 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
29752 else if (REGNO (frame_reg_rtx) == 12)
29753 cr_save_regno = 11;
29755 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
29756 info->cr_save_offset + frame_off,
29757 exit_func);
29760 /* Set LR here to try to overlap restores below. */
29761 if (restore_lr && restoring_GPRs_inline)
29762 restore_saved_lr (0, exit_func);
29764 /* Load exception handler data registers, if needed. */
29765 if (crtl->calls_eh_return)
29767 unsigned int i, regno;
29769 if (TARGET_AIX)
29771 rtx reg = gen_rtx_REG (reg_mode, 2);
29772 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29773 frame_off + RS6000_TOC_SAVE_SLOT));
29776 for (i = 0; ; ++i)
29778 rtx mem;
29780 regno = EH_RETURN_DATA_REGNO (i);
29781 if (regno == INVALID_REGNUM)
29782 break;
29784 /* Note: possible use of r0 here to address SPE regs. */
29785 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
29786 info->ehrd_offset + frame_off
29787 + reg_size * (int) i);
29789 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
29793 /* Restore GPRs. This is done as a PARALLEL if we are using
29794 the load-multiple instructions. */
29795 if (TARGET_SPE_ABI
29796 && info->spe_64bit_regs_used
29797 && info->first_gp_reg_save != 32)
29799 /* Determine whether we can address all of the registers that need
29800 to be saved with an offset from frame_reg_rtx that fits in
29801 the small const field for SPE memory instructions. */
29802 int spe_regs_addressable
29803 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29804 + reg_size * (32 - info->first_gp_reg_save - 1))
29805 && restoring_GPRs_inline);
29807 if (!spe_regs_addressable)
29809 int ool_adjust = 0;
29810 rtx old_frame_reg_rtx = frame_reg_rtx;
29811 /* Make r11 point to the start of the SPE save area. We worried about
29812 not clobbering it when we were saving registers in the prologue.
29813 There's no need to worry here because the static chain is passed
29814 anew to every function. */
29816 if (!restoring_GPRs_inline)
29817 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29818 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29819 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
29820 GEN_INT (info->spe_gp_save_offset
29821 + frame_off
29822 - ool_adjust)));
29823 /* Keep the invariant that frame_reg_rtx + frame_off points
29824 at the top of the stack frame. */
29825 frame_off = -info->spe_gp_save_offset + ool_adjust;
29828 if (restoring_GPRs_inline)
29830 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
29832 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29833 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29835 rtx offset, addr, mem, reg;
29837 /* We're doing all this to ensure that the immediate offset
29838 fits into the immediate field of 'evldd'. */
29839 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
29841 offset = GEN_INT (spe_offset + reg_size * i);
29842 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
29843 mem = gen_rtx_MEM (V2SImode, addr);
29844 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29846 emit_move_insn (reg, mem);
29849 else
29850 rs6000_emit_savres_rtx (info, frame_reg_rtx,
29851 info->spe_gp_save_offset + frame_off,
29852 info->lr_save_offset + frame_off,
29853 reg_mode,
29854 SAVRES_GPR | SAVRES_LR);
29856 else if (!restoring_GPRs_inline)
29858 /* We are jumping to an out-of-line function. */
29859 rtx ptr_reg;
29860 int end_save = info->gp_save_offset + info->gp_size;
29861 bool can_use_exit = end_save == 0;
29862 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
29863 int ptr_off;
29865 /* Emit stack reset code if we need it. */
29866 ptr_regno = ptr_regno_for_savres (sel);
29867 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29868 if (can_use_exit)
29869 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29870 else if (end_save + frame_off != 0)
29871 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
29872 GEN_INT (end_save + frame_off)));
29873 else if (REGNO (frame_reg_rtx) != ptr_regno)
29874 emit_move_insn (ptr_reg, frame_reg_rtx);
29875 if (REGNO (frame_reg_rtx) == ptr_regno)
29876 frame_off = -end_save;
29878 if (can_use_exit && info->cr_save_p)
29879 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
29881 ptr_off = -end_save;
29882 rs6000_emit_savres_rtx (info, ptr_reg,
29883 info->gp_save_offset + ptr_off,
29884 info->lr_save_offset + ptr_off,
29885 reg_mode, sel);
29887 else if (using_load_multiple)
29889 rtvec p;
29890 p = rtvec_alloc (32 - info->first_gp_reg_save);
29891 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29892 RTVEC_ELT (p, i)
29893 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29894 frame_reg_rtx,
29895 info->gp_save_offset + frame_off + reg_size * i);
29896 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29898 else
29900 int offset = info->gp_save_offset + frame_off;
29901 for (i = info->first_gp_reg_save; i < 32; i++)
29903 if (rs6000_reg_live_or_pic_offset_p (i)
29904 && !cfun->machine->gpr_is_wrapped_separately[i])
29906 rtx reg = gen_rtx_REG (reg_mode, i);
29907 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
29910 offset += reg_size;
29914 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29916 /* If the frame pointer was used then we can't delay emitting
29917 a REG_CFA_DEF_CFA note. This must happen on the insn that
29918 restores the frame pointer, r31. We may have already emitted
29919 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
29920 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
29921 be harmless if emitted. */
29922 if (frame_pointer_needed)
29924 insn = get_last_insn ();
29925 add_reg_note (insn, REG_CFA_DEF_CFA,
29926 plus_constant (Pmode, frame_reg_rtx, frame_off));
29927 RTX_FRAME_RELATED_P (insn) = 1;
29930 /* Set up cfa_restores. We always need these when
29931 shrink-wrapping. If not shrink-wrapping then we only need
29932 the cfa_restore when the stack location is no longer valid.
29933 The cfa_restores must be emitted on or before the insn that
29934 invalidates the stack, and of course must not be emitted
29935 before the insn that actually does the restore. The latter
29936 is why it is a bad idea to emit the cfa_restores as a group
29937 on the last instruction here that actually does a restore:
29938 That insn may be reordered with respect to others doing
29939 restores. */
29940 if (flag_shrink_wrap
29941 && !restoring_GPRs_inline
29942 && info->first_fp_reg_save == 64)
29943 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29945 for (i = info->first_gp_reg_save; i < 32; i++)
29946 if (!restoring_GPRs_inline
29947 || using_load_multiple
29948 || rs6000_reg_live_or_pic_offset_p (i))
29950 if (cfun->machine->gpr_is_wrapped_separately[i])
29951 continue;
29953 rtx reg = gen_rtx_REG (reg_mode, i);
29954 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29958 if (!restoring_GPRs_inline
29959 && info->first_fp_reg_save == 64)
29961 /* We are jumping to an out-of-line function. */
29962 if (cfa_restores)
29963 emit_cfa_restores (cfa_restores);
29964 return;
29967 if (restore_lr && !restoring_GPRs_inline)
29969 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29970 restore_saved_lr (0, exit_func);
29973 /* Restore fpr's if we need to do it without calling a function. */
29974 if (restoring_FPRs_inline)
29975 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29976 if (save_reg_p (info->first_fp_reg_save + i))
29978 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29979 ? DFmode : SFmode),
29980 info->first_fp_reg_save + i);
29981 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29982 info->fp_save_offset + frame_off + 8 * i));
29983 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29984 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29987 /* If we saved cr, restore it here. Just those that were used. */
29988 if (info->cr_save_p)
29989 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
29991 /* If this is V.4, unwind the stack pointer after all of the loads
29992 have been done, or set up r11 if we are restoring fp out of line. */
29993 ptr_regno = 1;
29994 if (!restoring_FPRs_inline)
29996 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29997 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29998 ptr_regno = ptr_regno_for_savres (sel);
30001 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
30002 if (REGNO (frame_reg_rtx) == ptr_regno)
30003 frame_off = 0;
30005 if (insn && restoring_FPRs_inline)
30007 if (cfa_restores)
30009 REG_NOTES (insn) = cfa_restores;
30010 cfa_restores = NULL_RTX;
30012 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30013 RTX_FRAME_RELATED_P (insn) = 1;
30016 if (crtl->calls_eh_return)
30018 rtx sa = EH_RETURN_STACKADJ_RTX;
30019 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
30022 if (!sibcall && restoring_FPRs_inline)
30024 if (cfa_restores)
30026 /* We can't hang the cfa_restores off a simple return,
30027 since the shrink-wrap code sometimes uses an existing
30028 return. This means there might be a path from
30029 pre-prologue code to this return, and dwarf2cfi code
30030 wants the eh_frame unwinder state to be the same on
30031 all paths to any point. So we need to emit the
30032 cfa_restores before the return. For -m64 we really
30033 don't need epilogue cfa_restores at all, except for
30034 this irritating dwarf2cfi with shrink-wrap
30035 requirement; The stack red-zone means eh_frame info
30036 from the prologue telling the unwinder to restore
30037 from the stack is perfectly good right to the end of
30038 the function. */
30039 emit_insn (gen_blockage ());
30040 emit_cfa_restores (cfa_restores);
30041 cfa_restores = NULL_RTX;
30044 emit_jump_insn (targetm.gen_simple_return ());
30047 if (!sibcall && !restoring_FPRs_inline)
30049 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30050 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
30051 int elt = 0;
30052 RTVEC_ELT (p, elt++) = ret_rtx;
30053 if (lr)
30054 RTVEC_ELT (p, elt++)
30055 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
30057 /* We have to restore more than two FP registers, so branch to the
30058 restore function. It will return to our caller. */
30059 int i;
30060 int reg;
30061 rtx sym;
30063 if (flag_shrink_wrap)
30064 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30066 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
30067 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
30068 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
30069 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
30071 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30073 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
30075 RTVEC_ELT (p, elt++)
30076 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
30077 if (flag_shrink_wrap)
30078 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30081 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30084 if (cfa_restores)
30086 if (sibcall)
30087 /* Ensure the cfa_restores are hung off an insn that won't
30088 be reordered above other restores. */
30089 emit_insn (gen_blockage ());
30091 emit_cfa_restores (cfa_restores);
30095 /* Write function epilogue. */
30097 static void
30098 rs6000_output_function_epilogue (FILE *file,
30099 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
30101 #if TARGET_MACHO
30102 macho_branch_islands ();
30103 /* Mach-O doesn't support labels at the end of objects, so if
30104 it looks like we might want one, insert a NOP. */
30106 rtx_insn *insn = get_last_insn ();
30107 rtx_insn *deleted_debug_label = NULL;
30108 while (insn
30109 && NOTE_P (insn)
30110 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
30112 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
30113 notes only, instead set their CODE_LABEL_NUMBER to -1,
30114 otherwise there would be code generation differences
30115 in between -g and -g0. */
30116 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30117 deleted_debug_label = insn;
30118 insn = PREV_INSN (insn);
30120 if (insn
30121 && (LABEL_P (insn)
30122 || (NOTE_P (insn)
30123 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
30124 fputs ("\tnop\n", file);
30125 else if (deleted_debug_label)
30126 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
30127 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30128 CODE_LABEL_NUMBER (insn) = -1;
30130 #endif
30132 /* Output a traceback table here. See /usr/include/sys/debug.h for info
30133 on its format.
30135 We don't output a traceback table if -finhibit-size-directive was
30136 used. The documentation for -finhibit-size-directive reads
30137 ``don't output a @code{.size} assembler directive, or anything
30138 else that would cause trouble if the function is split in the
30139 middle, and the two halves are placed at locations far apart in
30140 memory.'' The traceback table has this property, since it
30141 includes the offset from the start of the function to the
30142 traceback table itself.
30144 System V.4 Powerpc's (and the embedded ABI derived from it) use a
30145 different traceback table. */
30146 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30147 && ! flag_inhibit_size_directive
30148 && rs6000_traceback != traceback_none && !cfun->is_thunk)
30150 const char *fname = NULL;
30151 const char *language_string = lang_hooks.name;
30152 int fixed_parms = 0, float_parms = 0, parm_info = 0;
30153 int i;
30154 int optional_tbtab;
30155 rs6000_stack_t *info = rs6000_stack_info ();
30157 if (rs6000_traceback == traceback_full)
30158 optional_tbtab = 1;
30159 else if (rs6000_traceback == traceback_part)
30160 optional_tbtab = 0;
30161 else
30162 optional_tbtab = !optimize_size && !TARGET_ELF;
30164 if (optional_tbtab)
30166 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30167 while (*fname == '.') /* V.4 encodes . in the name */
30168 fname++;
30170 /* Need label immediately before tbtab, so we can compute
30171 its offset from the function start. */
30172 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30173 ASM_OUTPUT_LABEL (file, fname);
30176 /* The .tbtab pseudo-op can only be used for the first eight
30177 expressions, since it can't handle the possibly variable
30178 length fields that follow. However, if you omit the optional
30179 fields, the assembler outputs zeros for all optional fields
30180 anyways, giving each variable length field is minimum length
30181 (as defined in sys/debug.h). Thus we can not use the .tbtab
30182 pseudo-op at all. */
30184 /* An all-zero word flags the start of the tbtab, for debuggers
30185 that have to find it by searching forward from the entry
30186 point or from the current pc. */
30187 fputs ("\t.long 0\n", file);
30189 /* Tbtab format type. Use format type 0. */
30190 fputs ("\t.byte 0,", file);
30192 /* Language type. Unfortunately, there does not seem to be any
30193 official way to discover the language being compiled, so we
30194 use language_string.
30195 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
30196 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
30197 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
30198 either, so for now use 0. */
30199 if (lang_GNU_C ()
30200 || ! strcmp (language_string, "GNU GIMPLE")
30201 || ! strcmp (language_string, "GNU Go")
30202 || ! strcmp (language_string, "libgccjit"))
30203 i = 0;
30204 else if (! strcmp (language_string, "GNU F77")
30205 || lang_GNU_Fortran ())
30206 i = 1;
30207 else if (! strcmp (language_string, "GNU Pascal"))
30208 i = 2;
30209 else if (! strcmp (language_string, "GNU Ada"))
30210 i = 3;
30211 else if (lang_GNU_CXX ()
30212 || ! strcmp (language_string, "GNU Objective-C++"))
30213 i = 9;
30214 else if (! strcmp (language_string, "GNU Java"))
30215 i = 13;
30216 else if (! strcmp (language_string, "GNU Objective-C"))
30217 i = 14;
30218 else
30219 gcc_unreachable ();
30220 fprintf (file, "%d,", i);
30222 /* 8 single bit fields: global linkage (not set for C extern linkage,
30223 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
30224 from start of procedure stored in tbtab, internal function, function
30225 has controlled storage, function has no toc, function uses fp,
30226 function logs/aborts fp operations. */
30227 /* Assume that fp operations are used if any fp reg must be saved. */
30228 fprintf (file, "%d,",
30229 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
30231 /* 6 bitfields: function is interrupt handler, name present in
30232 proc table, function calls alloca, on condition directives
30233 (controls stack walks, 3 bits), saves condition reg, saves
30234 link reg. */
30235 /* The `function calls alloca' bit seems to be set whenever reg 31 is
30236 set up as a frame pointer, even when there is no alloca call. */
30237 fprintf (file, "%d,",
30238 ((optional_tbtab << 6)
30239 | ((optional_tbtab & frame_pointer_needed) << 5)
30240 | (info->cr_save_p << 1)
30241 | (info->lr_save_p)));
30243 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
30244 (6 bits). */
30245 fprintf (file, "%d,",
30246 (info->push_p << 7) | (64 - info->first_fp_reg_save));
30248 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
30249 fprintf (file, "%d,", (32 - first_reg_to_save ()));
30251 if (optional_tbtab)
30253 /* Compute the parameter info from the function decl argument
30254 list. */
30255 tree decl;
30256 int next_parm_info_bit = 31;
30258 for (decl = DECL_ARGUMENTS (current_function_decl);
30259 decl; decl = DECL_CHAIN (decl))
30261 rtx parameter = DECL_INCOMING_RTL (decl);
30262 machine_mode mode = GET_MODE (parameter);
30264 if (GET_CODE (parameter) == REG)
30266 if (SCALAR_FLOAT_MODE_P (mode))
30268 int bits;
30270 float_parms++;
30272 switch (mode)
30274 case SFmode:
30275 case SDmode:
30276 bits = 0x2;
30277 break;
30279 case DFmode:
30280 case DDmode:
30281 case TFmode:
30282 case TDmode:
30283 case IFmode:
30284 case KFmode:
30285 bits = 0x3;
30286 break;
30288 default:
30289 gcc_unreachable ();
30292 /* If only one bit will fit, don't or in this entry. */
30293 if (next_parm_info_bit > 0)
30294 parm_info |= (bits << (next_parm_info_bit - 1));
30295 next_parm_info_bit -= 2;
30297 else
30299 fixed_parms += ((GET_MODE_SIZE (mode)
30300 + (UNITS_PER_WORD - 1))
30301 / UNITS_PER_WORD);
30302 next_parm_info_bit -= 1;
30308 /* Number of fixed point parameters. */
30309 /* This is actually the number of words of fixed point parameters; thus
30310 an 8 byte struct counts as 2; and thus the maximum value is 8. */
30311 fprintf (file, "%d,", fixed_parms);
30313 /* 2 bitfields: number of floating point parameters (7 bits), parameters
30314 all on stack. */
30315 /* This is actually the number of fp registers that hold parameters;
30316 and thus the maximum value is 13. */
30317 /* Set parameters on stack bit if parameters are not in their original
30318 registers, regardless of whether they are on the stack? Xlc
30319 seems to set the bit when not optimizing. */
30320 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
30322 if (! optional_tbtab)
30323 return;
30325 /* Optional fields follow. Some are variable length. */
30327 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
30328 11 double float. */
30329 /* There is an entry for each parameter in a register, in the order that
30330 they occur in the parameter list. Any intervening arguments on the
30331 stack are ignored. If the list overflows a long (max possible length
30332 34 bits) then completely leave off all elements that don't fit. */
30333 /* Only emit this long if there was at least one parameter. */
30334 if (fixed_parms || float_parms)
30335 fprintf (file, "\t.long %d\n", parm_info);
30337 /* Offset from start of code to tb table. */
30338 fputs ("\t.long ", file);
30339 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30340 RS6000_OUTPUT_BASENAME (file, fname);
30341 putc ('-', file);
30342 rs6000_output_function_entry (file, fname);
30343 putc ('\n', file);
30345 /* Interrupt handler mask. */
30346 /* Omit this long, since we never set the interrupt handler bit
30347 above. */
30349 /* Number of CTL (controlled storage) anchors. */
30350 /* Omit this long, since the has_ctl bit is never set above. */
30352 /* Displacement into stack of each CTL anchor. */
30353 /* Omit this list of longs, because there are no CTL anchors. */
30355 /* Length of function name. */
30356 if (*fname == '*')
30357 ++fname;
30358 fprintf (file, "\t.short %d\n", (int) strlen (fname));
30360 /* Function name. */
30361 assemble_string (fname, strlen (fname));
30363 /* Register for alloca automatic storage; this is always reg 31.
30364 Only emit this if the alloca bit was set above. */
30365 if (frame_pointer_needed)
30366 fputs ("\t.byte 31\n", file);
30368 fputs ("\t.align 2\n", file);
30371 /* Arrange to define .LCTOC1 label, if not already done. */
30372 if (need_toc_init)
30374 need_toc_init = 0;
30375 if (!toc_initialized)
30377 switch_to_section (toc_section);
30378 switch_to_section (current_function_section ());
30383 /* -fsplit-stack support. */
30385 /* A SYMBOL_REF for __morestack. */
30386 static GTY(()) rtx morestack_ref;
30388 static rtx
30389 gen_add3_const (rtx rt, rtx ra, long c)
30391 if (TARGET_64BIT)
30392 return gen_adddi3 (rt, ra, GEN_INT (c));
30393 else
30394 return gen_addsi3 (rt, ra, GEN_INT (c));
30397 /* Emit -fsplit-stack prologue, which goes before the regular function
30398 prologue (at local entry point in the case of ELFv2). */
30400 void
30401 rs6000_expand_split_stack_prologue (void)
30403 rs6000_stack_t *info = rs6000_stack_info ();
30404 unsigned HOST_WIDE_INT allocate;
30405 long alloc_hi, alloc_lo;
30406 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
30407 rtx_insn *insn;
30409 gcc_assert (flag_split_stack && reload_completed);
30411 if (!info->push_p)
30412 return;
30414 if (global_regs[29])
30416 error ("-fsplit-stack uses register r29");
30417 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
30418 "conflicts with %qD", global_regs_decl[29]);
30421 allocate = info->total_size;
30422 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
30424 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
30425 return;
30427 if (morestack_ref == NULL_RTX)
30429 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
30430 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
30431 | SYMBOL_FLAG_FUNCTION);
30434 r0 = gen_rtx_REG (Pmode, 0);
30435 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30436 r12 = gen_rtx_REG (Pmode, 12);
30437 emit_insn (gen_load_split_stack_limit (r0));
30438 /* Always emit two insns here to calculate the requested stack,
30439 so that the linker can edit them when adjusting size for calling
30440 non-split-stack code. */
30441 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
30442 alloc_lo = -allocate - alloc_hi;
30443 if (alloc_hi != 0)
30445 emit_insn (gen_add3_const (r12, r1, alloc_hi));
30446 if (alloc_lo != 0)
30447 emit_insn (gen_add3_const (r12, r12, alloc_lo));
30448 else
30449 emit_insn (gen_nop ());
30451 else
30453 emit_insn (gen_add3_const (r12, r1, alloc_lo));
30454 emit_insn (gen_nop ());
30457 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30458 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
30459 ok_label = gen_label_rtx ();
30460 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30461 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
30462 gen_rtx_LABEL_REF (VOIDmode, ok_label),
30463 pc_rtx);
30464 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30465 JUMP_LABEL (jump) = ok_label;
30466 /* Mark the jump as very likely to be taken. */
30467 add_int_reg_note (jump, REG_BR_PROB,
30468 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
30470 lr = gen_rtx_REG (Pmode, LR_REGNO);
30471 insn = emit_move_insn (r0, lr);
30472 RTX_FRAME_RELATED_P (insn) = 1;
30473 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
30474 RTX_FRAME_RELATED_P (insn) = 1;
30476 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
30477 const0_rtx, const0_rtx));
30478 call_fusage = NULL_RTX;
30479 use_reg (&call_fusage, r12);
30480 /* Say the call uses r0, even though it doesn't, to stop regrename
30481 from twiddling with the insns saving lr, trashing args for cfun.
30482 The insns restoring lr are similarly protected by making
30483 split_stack_return use r0. */
30484 use_reg (&call_fusage, r0);
30485 add_function_usage_to (insn, call_fusage);
30486 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
30487 insn = emit_move_insn (lr, r0);
30488 add_reg_note (insn, REG_CFA_RESTORE, lr);
30489 RTX_FRAME_RELATED_P (insn) = 1;
30490 emit_insn (gen_split_stack_return ());
30492 emit_label (ok_label);
30493 LABEL_NUSES (ok_label) = 1;
30496 /* Return the internal arg pointer used for function incoming
30497 arguments. When -fsplit-stack, the arg pointer is r12 so we need
30498 to copy it to a pseudo in order for it to be preserved over calls
30499 and suchlike. We'd really like to use a pseudo here for the
30500 internal arg pointer but data-flow analysis is not prepared to
30501 accept pseudos as live at the beginning of a function. */
30503 static rtx
30504 rs6000_internal_arg_pointer (void)
30506 if (flag_split_stack
30507 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
30508 == NULL))
30511 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
30513 rtx pat;
30515 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
30516 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
30518 /* Put the pseudo initialization right after the note at the
30519 beginning of the function. */
30520 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
30521 gen_rtx_REG (Pmode, 12));
30522 push_topmost_sequence ();
30523 emit_insn_after (pat, get_insns ());
30524 pop_topmost_sequence ();
30526 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
30527 FIRST_PARM_OFFSET (current_function_decl));
30529 return virtual_incoming_args_rtx;
30532 /* We may have to tell the dataflow pass that the split stack prologue
30533 is initializing a register. */
30535 static void
30536 rs6000_live_on_entry (bitmap regs)
30538 if (flag_split_stack)
30539 bitmap_set_bit (regs, 12);
30542 /* Emit -fsplit-stack dynamic stack allocation space check. */
30544 void
30545 rs6000_split_stack_space_check (rtx size, rtx label)
30547 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30548 rtx limit = gen_reg_rtx (Pmode);
30549 rtx requested = gen_reg_rtx (Pmode);
30550 rtx cmp = gen_reg_rtx (CCUNSmode);
30551 rtx jump;
30553 emit_insn (gen_load_split_stack_limit (limit));
30554 if (CONST_INT_P (size))
30555 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
30556 else
30558 size = force_reg (Pmode, size);
30559 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
30561 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
30562 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30563 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
30564 gen_rtx_LABEL_REF (VOIDmode, label),
30565 pc_rtx);
30566 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30567 JUMP_LABEL (jump) = label;
30570 /* A C compound statement that outputs the assembler code for a thunk
30571 function, used to implement C++ virtual function calls with
30572 multiple inheritance. The thunk acts as a wrapper around a virtual
30573 function, adjusting the implicit object parameter before handing
30574 control off to the real function.
30576 First, emit code to add the integer DELTA to the location that
30577 contains the incoming first argument. Assume that this argument
30578 contains a pointer, and is the one used to pass the `this' pointer
30579 in C++. This is the incoming argument *before* the function
30580 prologue, e.g. `%o0' on a sparc. The addition must preserve the
30581 values of all other incoming arguments.
30583 After the addition, emit code to jump to FUNCTION, which is a
30584 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
30585 not touch the return address. Hence returning from FUNCTION will
30586 return to whoever called the current `thunk'.
30588 The effect must be as if FUNCTION had been called directly with the
30589 adjusted first argument. This macro is responsible for emitting
30590 all of the code for a thunk function; output_function_prologue()
30591 and output_function_epilogue() are not invoked.
30593 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
30594 been extracted from it.) It might possibly be useful on some
30595 targets, but probably not.
30597 If you do not define this macro, the target-independent code in the
30598 C++ frontend will generate a less efficient heavyweight thunk that
30599 calls FUNCTION instead of jumping to it. The generic approach does
30600 not support varargs. */
30602 static void
30603 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
30604 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
30605 tree function)
30607 rtx this_rtx, funexp;
30608 rtx_insn *insn;
30610 reload_completed = 1;
30611 epilogue_completed = 1;
30613 /* Mark the end of the (empty) prologue. */
30614 emit_note (NOTE_INSN_PROLOGUE_END);
30616 /* Find the "this" pointer. If the function returns a structure,
30617 the structure return pointer is in r3. */
30618 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
30619 this_rtx = gen_rtx_REG (Pmode, 4);
30620 else
30621 this_rtx = gen_rtx_REG (Pmode, 3);
30623 /* Apply the constant offset, if required. */
30624 if (delta)
30625 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
30627 /* Apply the offset from the vtable, if required. */
30628 if (vcall_offset)
30630 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
30631 rtx tmp = gen_rtx_REG (Pmode, 12);
30633 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
30634 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
30636 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
30637 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
30639 else
30641 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
30643 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
30645 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
30648 /* Generate a tail call to the target function. */
30649 if (!TREE_USED (function))
30651 assemble_external (function);
30652 TREE_USED (function) = 1;
30654 funexp = XEXP (DECL_RTL (function), 0);
30655 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
30657 #if TARGET_MACHO
30658 if (MACHOPIC_INDIRECT)
30659 funexp = machopic_indirect_call_target (funexp);
30660 #endif
30662 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
30663 generate sibcall RTL explicitly. */
30664 insn = emit_call_insn (
30665 gen_rtx_PARALLEL (VOIDmode,
30666 gen_rtvec (3,
30667 gen_rtx_CALL (VOIDmode,
30668 funexp, const0_rtx),
30669 gen_rtx_USE (VOIDmode, const0_rtx),
30670 simple_return_rtx)));
30671 SIBLING_CALL_P (insn) = 1;
30672 emit_barrier ();
30674 /* Run just enough of rest_of_compilation to get the insns emitted.
30675 There's not really enough bulk here to make other passes such as
30676 instruction scheduling worth while. Note that use_thunk calls
30677 assemble_start_function and assemble_end_function. */
30678 insn = get_insns ();
30679 shorten_branches (insn);
30680 final_start_function (insn, file, 1);
30681 final (insn, file, 1);
30682 final_end_function ();
30684 reload_completed = 0;
30685 epilogue_completed = 0;
30688 /* A quick summary of the various types of 'constant-pool tables'
30689 under PowerPC:
30691 Target Flags Name One table per
30692 AIX (none) AIX TOC object file
30693 AIX -mfull-toc AIX TOC object file
30694 AIX -mminimal-toc AIX minimal TOC translation unit
30695 SVR4/EABI (none) SVR4 SDATA object file
30696 SVR4/EABI -fpic SVR4 pic object file
30697 SVR4/EABI -fPIC SVR4 PIC translation unit
30698 SVR4/EABI -mrelocatable EABI TOC function
30699 SVR4/EABI -maix AIX TOC object file
30700 SVR4/EABI -maix -mminimal-toc
30701 AIX minimal TOC translation unit
30703 Name Reg. Set by entries contains:
30704 made by addrs? fp? sum?
30706 AIX TOC 2 crt0 as Y option option
30707 AIX minimal TOC 30 prolog gcc Y Y option
30708 SVR4 SDATA 13 crt0 gcc N Y N
30709 SVR4 pic 30 prolog ld Y not yet N
30710 SVR4 PIC 30 prolog gcc Y option option
30711 EABI TOC 30 prolog gcc Y option option
30715 /* Hash functions for the hash table. */
30717 static unsigned
30718 rs6000_hash_constant (rtx k)
30720 enum rtx_code code = GET_CODE (k);
30721 machine_mode mode = GET_MODE (k);
30722 unsigned result = (code << 3) ^ mode;
30723 const char *format;
30724 int flen, fidx;
30726 format = GET_RTX_FORMAT (code);
30727 flen = strlen (format);
30728 fidx = 0;
30730 switch (code)
30732 case LABEL_REF:
30733 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
30735 case CONST_WIDE_INT:
30737 int i;
30738 flen = CONST_WIDE_INT_NUNITS (k);
30739 for (i = 0; i < flen; i++)
30740 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
30741 return result;
30744 case CONST_DOUBLE:
30745 if (mode != VOIDmode)
30746 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
30747 flen = 2;
30748 break;
30750 case CODE_LABEL:
30751 fidx = 3;
30752 break;
30754 default:
30755 break;
30758 for (; fidx < flen; fidx++)
30759 switch (format[fidx])
30761 case 's':
30763 unsigned i, len;
30764 const char *str = XSTR (k, fidx);
30765 len = strlen (str);
30766 result = result * 613 + len;
30767 for (i = 0; i < len; i++)
30768 result = result * 613 + (unsigned) str[i];
30769 break;
30771 case 'u':
30772 case 'e':
30773 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
30774 break;
30775 case 'i':
30776 case 'n':
30777 result = result * 613 + (unsigned) XINT (k, fidx);
30778 break;
30779 case 'w':
30780 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
30781 result = result * 613 + (unsigned) XWINT (k, fidx);
30782 else
30784 size_t i;
30785 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
30786 result = result * 613 + (unsigned) (XWINT (k, fidx)
30787 >> CHAR_BIT * i);
30789 break;
30790 case '0':
30791 break;
30792 default:
30793 gcc_unreachable ();
30796 return result;
30799 hashval_t
30800 toc_hasher::hash (toc_hash_struct *thc)
30802 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
30805 /* Compare H1 and H2 for equivalence. */
30807 bool
30808 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
30810 rtx r1 = h1->key;
30811 rtx r2 = h2->key;
30813 if (h1->key_mode != h2->key_mode)
30814 return 0;
30816 return rtx_equal_p (r1, r2);
30819 /* These are the names given by the C++ front-end to vtables, and
30820 vtable-like objects. Ideally, this logic should not be here;
30821 instead, there should be some programmatic way of inquiring as
30822 to whether or not an object is a vtable. */
30824 #define VTABLE_NAME_P(NAME) \
30825 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
30826 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
30827 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
30828 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
30829 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
30831 #ifdef NO_DOLLAR_IN_LABEL
30832 /* Return a GGC-allocated character string translating dollar signs in
30833 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
30835 const char *
30836 rs6000_xcoff_strip_dollar (const char *name)
30838 char *strip, *p;
30839 const char *q;
30840 size_t len;
30842 q = (const char *) strchr (name, '$');
30844 if (q == 0 || q == name)
30845 return name;
30847 len = strlen (name);
30848 strip = XALLOCAVEC (char, len + 1);
30849 strcpy (strip, name);
30850 p = strip + (q - name);
30851 while (p)
30853 *p = '_';
30854 p = strchr (p + 1, '$');
30857 return ggc_alloc_string (strip, len);
30859 #endif
30861 void
30862 rs6000_output_symbol_ref (FILE *file, rtx x)
30864 const char *name = XSTR (x, 0);
30866 /* Currently C++ toc references to vtables can be emitted before it
30867 is decided whether the vtable is public or private. If this is
30868 the case, then the linker will eventually complain that there is
30869 a reference to an unknown section. Thus, for vtables only,
30870 we emit the TOC reference to reference the identifier and not the
30871 symbol. */
30872 if (VTABLE_NAME_P (name))
30874 RS6000_OUTPUT_BASENAME (file, name);
30876 else
30877 assemble_name (file, name);
30880 /* Output a TOC entry. We derive the entry name from what is being
30881 written. */
30883 void
30884 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
30886 char buf[256];
30887 const char *name = buf;
30888 rtx base = x;
30889 HOST_WIDE_INT offset = 0;
30891 gcc_assert (!TARGET_NO_TOC);
30893 /* When the linker won't eliminate them, don't output duplicate
30894 TOC entries (this happens on AIX if there is any kind of TOC,
30895 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
30896 CODE_LABELs. */
30897 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
30899 struct toc_hash_struct *h;
30901 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
30902 time because GGC is not initialized at that point. */
30903 if (toc_hash_table == NULL)
30904 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
30906 h = ggc_alloc<toc_hash_struct> ();
30907 h->key = x;
30908 h->key_mode = mode;
30909 h->labelno = labelno;
30911 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
30912 if (*found == NULL)
30913 *found = h;
30914 else /* This is indeed a duplicate.
30915 Set this label equal to that label. */
30917 fputs ("\t.set ", file);
30918 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30919 fprintf (file, "%d,", labelno);
30920 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30921 fprintf (file, "%d\n", ((*found)->labelno));
30923 #ifdef HAVE_AS_TLS
30924 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
30925 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
30926 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
30928 fputs ("\t.set ", file);
30929 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30930 fprintf (file, "%d,", labelno);
30931 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30932 fprintf (file, "%d\n", ((*found)->labelno));
30934 #endif
30935 return;
30939 /* If we're going to put a double constant in the TOC, make sure it's
30940 aligned properly when strict alignment is on. */
30941 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
30942 && STRICT_ALIGNMENT
30943 && GET_MODE_BITSIZE (mode) >= 64
30944 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
30945 ASM_OUTPUT_ALIGN (file, 3);
30948 (*targetm.asm_out.internal_label) (file, "LC", labelno);
30950 /* Handle FP constants specially. Note that if we have a minimal
30951 TOC, things we put here aren't actually in the TOC, so we can allow
30952 FP constants. */
30953 if (GET_CODE (x) == CONST_DOUBLE &&
30954 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
30955 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
30957 long k[4];
30959 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30960 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
30961 else
30962 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30964 if (TARGET_64BIT)
30966 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30967 fputs (DOUBLE_INT_ASM_OP, file);
30968 else
30969 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30970 k[0] & 0xffffffff, k[1] & 0xffffffff,
30971 k[2] & 0xffffffff, k[3] & 0xffffffff);
30972 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
30973 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30974 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
30975 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
30976 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
30977 return;
30979 else
30981 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30982 fputs ("\t.long ", file);
30983 else
30984 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30985 k[0] & 0xffffffff, k[1] & 0xffffffff,
30986 k[2] & 0xffffffff, k[3] & 0xffffffff);
30987 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
30988 k[0] & 0xffffffff, k[1] & 0xffffffff,
30989 k[2] & 0xffffffff, k[3] & 0xffffffff);
30990 return;
30993 else if (GET_CODE (x) == CONST_DOUBLE &&
30994 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
30996 long k[2];
30998 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30999 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
31000 else
31001 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31003 if (TARGET_64BIT)
31005 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31006 fputs (DOUBLE_INT_ASM_OP, file);
31007 else
31008 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31009 k[0] & 0xffffffff, k[1] & 0xffffffff);
31010 fprintf (file, "0x%lx%08lx\n",
31011 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31012 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
31013 return;
31015 else
31017 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31018 fputs ("\t.long ", file);
31019 else
31020 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31021 k[0] & 0xffffffff, k[1] & 0xffffffff);
31022 fprintf (file, "0x%lx,0x%lx\n",
31023 k[0] & 0xffffffff, k[1] & 0xffffffff);
31024 return;
31027 else if (GET_CODE (x) == CONST_DOUBLE &&
31028 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
31030 long l;
31032 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31033 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
31034 else
31035 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
31037 if (TARGET_64BIT)
31039 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31040 fputs (DOUBLE_INT_ASM_OP, file);
31041 else
31042 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31043 if (WORDS_BIG_ENDIAN)
31044 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
31045 else
31046 fprintf (file, "0x%lx\n", l & 0xffffffff);
31047 return;
31049 else
31051 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31052 fputs ("\t.long ", file);
31053 else
31054 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31055 fprintf (file, "0x%lx\n", l & 0xffffffff);
31056 return;
31059 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
31061 unsigned HOST_WIDE_INT low;
31062 HOST_WIDE_INT high;
31064 low = INTVAL (x) & 0xffffffff;
31065 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
31067 /* TOC entries are always Pmode-sized, so when big-endian
31068 smaller integer constants in the TOC need to be padded.
31069 (This is still a win over putting the constants in
31070 a separate constant pool, because then we'd have
31071 to have both a TOC entry _and_ the actual constant.)
31073 For a 32-bit target, CONST_INT values are loaded and shifted
31074 entirely within `low' and can be stored in one TOC entry. */
31076 /* It would be easy to make this work, but it doesn't now. */
31077 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
31079 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
31081 low |= high << 32;
31082 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
31083 high = (HOST_WIDE_INT) low >> 32;
31084 low &= 0xffffffff;
31087 if (TARGET_64BIT)
31089 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31090 fputs (DOUBLE_INT_ASM_OP, file);
31091 else
31092 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31093 (long) high & 0xffffffff, (long) low & 0xffffffff);
31094 fprintf (file, "0x%lx%08lx\n",
31095 (long) high & 0xffffffff, (long) low & 0xffffffff);
31096 return;
31098 else
31100 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
31102 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31103 fputs ("\t.long ", file);
31104 else
31105 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31106 (long) high & 0xffffffff, (long) low & 0xffffffff);
31107 fprintf (file, "0x%lx,0x%lx\n",
31108 (long) high & 0xffffffff, (long) low & 0xffffffff);
31110 else
31112 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31113 fputs ("\t.long ", file);
31114 else
31115 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
31116 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
31118 return;
31122 if (GET_CODE (x) == CONST)
31124 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
31125 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
31127 base = XEXP (XEXP (x, 0), 0);
31128 offset = INTVAL (XEXP (XEXP (x, 0), 1));
31131 switch (GET_CODE (base))
31133 case SYMBOL_REF:
31134 name = XSTR (base, 0);
31135 break;
31137 case LABEL_REF:
31138 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
31139 CODE_LABEL_NUMBER (XEXP (base, 0)));
31140 break;
31142 case CODE_LABEL:
31143 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
31144 break;
31146 default:
31147 gcc_unreachable ();
31150 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31151 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
31152 else
31154 fputs ("\t.tc ", file);
31155 RS6000_OUTPUT_BASENAME (file, name);
31157 if (offset < 0)
31158 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
31159 else if (offset)
31160 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
31162 /* Mark large TOC symbols on AIX with [TE] so they are mapped
31163 after other TOC symbols, reducing overflow of small TOC access
31164 to [TC] symbols. */
31165 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
31166 ? "[TE]," : "[TC],", file);
31169 /* Currently C++ toc references to vtables can be emitted before it
31170 is decided whether the vtable is public or private. If this is
31171 the case, then the linker will eventually complain that there is
31172 a TOC reference to an unknown section. Thus, for vtables only,
31173 we emit the TOC reference to reference the symbol and not the
31174 section. */
31175 if (VTABLE_NAME_P (name))
31177 RS6000_OUTPUT_BASENAME (file, name);
31178 if (offset < 0)
31179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
31180 else if (offset > 0)
31181 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
31183 else
31184 output_addr_const (file, x);
31186 #if HAVE_AS_TLS
31187 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
31189 switch (SYMBOL_REF_TLS_MODEL (base))
31191 case 0:
31192 break;
31193 case TLS_MODEL_LOCAL_EXEC:
31194 fputs ("@le", file);
31195 break;
31196 case TLS_MODEL_INITIAL_EXEC:
31197 fputs ("@ie", file);
31198 break;
31199 /* Use global-dynamic for local-dynamic. */
31200 case TLS_MODEL_GLOBAL_DYNAMIC:
31201 case TLS_MODEL_LOCAL_DYNAMIC:
31202 putc ('\n', file);
31203 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
31204 fputs ("\t.tc .", file);
31205 RS6000_OUTPUT_BASENAME (file, name);
31206 fputs ("[TC],", file);
31207 output_addr_const (file, x);
31208 fputs ("@m", file);
31209 break;
31210 default:
31211 gcc_unreachable ();
31214 #endif
31216 putc ('\n', file);
31219 /* Output an assembler pseudo-op to write an ASCII string of N characters
31220 starting at P to FILE.
31222 On the RS/6000, we have to do this using the .byte operation and
31223 write out special characters outside the quoted string.
31224 Also, the assembler is broken; very long strings are truncated,
31225 so we must artificially break them up early. */
31227 void
31228 output_ascii (FILE *file, const char *p, int n)
31230 char c;
31231 int i, count_string;
31232 const char *for_string = "\t.byte \"";
31233 const char *for_decimal = "\t.byte ";
31234 const char *to_close = NULL;
31236 count_string = 0;
31237 for (i = 0; i < n; i++)
31239 c = *p++;
31240 if (c >= ' ' && c < 0177)
31242 if (for_string)
31243 fputs (for_string, file);
31244 putc (c, file);
31246 /* Write two quotes to get one. */
31247 if (c == '"')
31249 putc (c, file);
31250 ++count_string;
31253 for_string = NULL;
31254 for_decimal = "\"\n\t.byte ";
31255 to_close = "\"\n";
31256 ++count_string;
31258 if (count_string >= 512)
31260 fputs (to_close, file);
31262 for_string = "\t.byte \"";
31263 for_decimal = "\t.byte ";
31264 to_close = NULL;
31265 count_string = 0;
31268 else
31270 if (for_decimal)
31271 fputs (for_decimal, file);
31272 fprintf (file, "%d", c);
31274 for_string = "\n\t.byte \"";
31275 for_decimal = ", ";
31276 to_close = "\n";
31277 count_string = 0;
31281 /* Now close the string if we have written one. Then end the line. */
31282 if (to_close)
31283 fputs (to_close, file);
31286 /* Generate a unique section name for FILENAME for a section type
31287 represented by SECTION_DESC. Output goes into BUF.
31289 SECTION_DESC can be any string, as long as it is different for each
31290 possible section type.
31292 We name the section in the same manner as xlc. The name begins with an
31293 underscore followed by the filename (after stripping any leading directory
31294 names) with the last period replaced by the string SECTION_DESC. If
31295 FILENAME does not contain a period, SECTION_DESC is appended to the end of
31296 the name. */
31298 void
31299 rs6000_gen_section_name (char **buf, const char *filename,
31300 const char *section_desc)
31302 const char *q, *after_last_slash, *last_period = 0;
31303 char *p;
31304 int len;
31306 after_last_slash = filename;
31307 for (q = filename; *q; q++)
31309 if (*q == '/')
31310 after_last_slash = q + 1;
31311 else if (*q == '.')
31312 last_period = q;
31315 len = strlen (after_last_slash) + strlen (section_desc) + 2;
31316 *buf = (char *) xmalloc (len);
31318 p = *buf;
31319 *p++ = '_';
31321 for (q = after_last_slash; *q; q++)
31323 if (q == last_period)
31325 strcpy (p, section_desc);
31326 p += strlen (section_desc);
31327 break;
31330 else if (ISALNUM (*q))
31331 *p++ = *q;
31334 if (last_period == 0)
31335 strcpy (p, section_desc);
31336 else
31337 *p = '\0';
31340 /* Emit profile function. */
31342 void
31343 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
31345 /* Non-standard profiling for kernels, which just saves LR then calls
31346 _mcount without worrying about arg saves. The idea is to change
31347 the function prologue as little as possible as it isn't easy to
31348 account for arg save/restore code added just for _mcount. */
31349 if (TARGET_PROFILE_KERNEL)
31350 return;
31352 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31354 #ifndef NO_PROFILE_COUNTERS
31355 # define NO_PROFILE_COUNTERS 0
31356 #endif
31357 if (NO_PROFILE_COUNTERS)
31358 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31359 LCT_NORMAL, VOIDmode, 0);
31360 else
31362 char buf[30];
31363 const char *label_name;
31364 rtx fun;
31366 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31367 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
31368 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
31370 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31371 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
31374 else if (DEFAULT_ABI == ABI_DARWIN)
31376 const char *mcount_name = RS6000_MCOUNT;
31377 int caller_addr_regno = LR_REGNO;
31379 /* Be conservative and always set this, at least for now. */
31380 crtl->uses_pic_offset_table = 1;
31382 #if TARGET_MACHO
31383 /* For PIC code, set up a stub and collect the caller's address
31384 from r0, which is where the prologue puts it. */
31385 if (MACHOPIC_INDIRECT
31386 && crtl->uses_pic_offset_table)
31387 caller_addr_regno = 0;
31388 #endif
31389 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
31390 LCT_NORMAL, VOIDmode, 1,
31391 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
31395 /* Write function profiler code. */
31397 void
31398 output_function_profiler (FILE *file, int labelno)
31400 char buf[100];
31402 switch (DEFAULT_ABI)
31404 default:
31405 gcc_unreachable ();
31407 case ABI_V4:
31408 if (!TARGET_32BIT)
31410 warning (0, "no profiling of 64-bit code for this ABI");
31411 return;
31413 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31414 fprintf (file, "\tmflr %s\n", reg_names[0]);
31415 if (NO_PROFILE_COUNTERS)
31417 asm_fprintf (file, "\tstw %s,4(%s)\n",
31418 reg_names[0], reg_names[1]);
31420 else if (TARGET_SECURE_PLT && flag_pic)
31422 if (TARGET_LINK_STACK)
31424 char name[32];
31425 get_ppc476_thunk_name (name);
31426 asm_fprintf (file, "\tbl %s\n", name);
31428 else
31429 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
31430 asm_fprintf (file, "\tstw %s,4(%s)\n",
31431 reg_names[0], reg_names[1]);
31432 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31433 asm_fprintf (file, "\taddis %s,%s,",
31434 reg_names[12], reg_names[12]);
31435 assemble_name (file, buf);
31436 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
31437 assemble_name (file, buf);
31438 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
31440 else if (flag_pic == 1)
31442 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
31443 asm_fprintf (file, "\tstw %s,4(%s)\n",
31444 reg_names[0], reg_names[1]);
31445 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31446 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
31447 assemble_name (file, buf);
31448 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
31450 else if (flag_pic > 1)
31452 asm_fprintf (file, "\tstw %s,4(%s)\n",
31453 reg_names[0], reg_names[1]);
31454 /* Now, we need to get the address of the label. */
31455 if (TARGET_LINK_STACK)
31457 char name[32];
31458 get_ppc476_thunk_name (name);
31459 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
31460 assemble_name (file, buf);
31461 fputs ("-.\n1:", file);
31462 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31463 asm_fprintf (file, "\taddi %s,%s,4\n",
31464 reg_names[11], reg_names[11]);
31466 else
31468 fputs ("\tbcl 20,31,1f\n\t.long ", file);
31469 assemble_name (file, buf);
31470 fputs ("-.\n1:", file);
31471 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31473 asm_fprintf (file, "\tlwz %s,0(%s)\n",
31474 reg_names[0], reg_names[11]);
31475 asm_fprintf (file, "\tadd %s,%s,%s\n",
31476 reg_names[0], reg_names[0], reg_names[11]);
31478 else
31480 asm_fprintf (file, "\tlis %s,", reg_names[12]);
31481 assemble_name (file, buf);
31482 fputs ("@ha\n", file);
31483 asm_fprintf (file, "\tstw %s,4(%s)\n",
31484 reg_names[0], reg_names[1]);
31485 asm_fprintf (file, "\tla %s,", reg_names[0]);
31486 assemble_name (file, buf);
31487 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
31490 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
31491 fprintf (file, "\tbl %s%s\n",
31492 RS6000_MCOUNT, flag_pic ? "@plt" : "");
31493 break;
31495 case ABI_AIX:
31496 case ABI_ELFv2:
31497 case ABI_DARWIN:
31498 /* Don't do anything, done in output_profile_hook (). */
31499 break;
31505 /* The following variable value is the last issued insn. */
31507 static rtx_insn *last_scheduled_insn;
31509 /* The following variable helps to balance issuing of load and
31510 store instructions */
31512 static int load_store_pendulum;
31514 /* The following variable helps pair divide insns during scheduling. */
31515 static int divide_cnt;
31516 /* The following variable helps pair and alternate vector and vector load
31517 insns during scheduling. */
31518 static int vec_load_pendulum;
31521 /* Power4 load update and store update instructions are cracked into a
31522 load or store and an integer insn which are executed in the same cycle.
31523 Branches have their own dispatch slot which does not count against the
31524 GCC issue rate, but it changes the program flow so there are no other
31525 instructions to issue in this cycle. */
31527 static int
31528 rs6000_variable_issue_1 (rtx_insn *insn, int more)
31530 last_scheduled_insn = insn;
31531 if (GET_CODE (PATTERN (insn)) == USE
31532 || GET_CODE (PATTERN (insn)) == CLOBBER)
31534 cached_can_issue_more = more;
31535 return cached_can_issue_more;
31538 if (insn_terminates_group_p (insn, current_group))
31540 cached_can_issue_more = 0;
31541 return cached_can_issue_more;
31544 /* If no reservation, but reach here */
31545 if (recog_memoized (insn) < 0)
31546 return more;
31548 if (rs6000_sched_groups)
31550 if (is_microcoded_insn (insn))
31551 cached_can_issue_more = 0;
31552 else if (is_cracked_insn (insn))
31553 cached_can_issue_more = more > 2 ? more - 2 : 0;
31554 else
31555 cached_can_issue_more = more - 1;
31557 return cached_can_issue_more;
31560 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
31561 return 0;
31563 cached_can_issue_more = more - 1;
31564 return cached_can_issue_more;
31567 static int
31568 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
31570 int r = rs6000_variable_issue_1 (insn, more);
31571 if (verbose)
31572 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
31573 return r;
31576 /* Adjust the cost of a scheduling dependency. Return the new cost of
31577 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
31579 static int
31580 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
31581 unsigned int)
31583 enum attr_type attr_type;
31585 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
31586 return cost;
31588 switch (dep_type)
31590 case REG_DEP_TRUE:
31592 /* Data dependency; DEP_INSN writes a register that INSN reads
31593 some cycles later. */
31595 /* Separate a load from a narrower, dependent store. */
31596 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
31597 && GET_CODE (PATTERN (insn)) == SET
31598 && GET_CODE (PATTERN (dep_insn)) == SET
31599 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
31600 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
31601 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
31602 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
31603 return cost + 14;
31605 attr_type = get_attr_type (insn);
31607 switch (attr_type)
31609 case TYPE_JMPREG:
31610 /* Tell the first scheduling pass about the latency between
31611 a mtctr and bctr (and mtlr and br/blr). The first
31612 scheduling pass will not know about this latency since
31613 the mtctr instruction, which has the latency associated
31614 to it, will be generated by reload. */
31615 return 4;
31616 case TYPE_BRANCH:
31617 /* Leave some extra cycles between a compare and its
31618 dependent branch, to inhibit expensive mispredicts. */
31619 if ((rs6000_cpu_attr == CPU_PPC603
31620 || rs6000_cpu_attr == CPU_PPC604
31621 || rs6000_cpu_attr == CPU_PPC604E
31622 || rs6000_cpu_attr == CPU_PPC620
31623 || rs6000_cpu_attr == CPU_PPC630
31624 || rs6000_cpu_attr == CPU_PPC750
31625 || rs6000_cpu_attr == CPU_PPC7400
31626 || rs6000_cpu_attr == CPU_PPC7450
31627 || rs6000_cpu_attr == CPU_PPCE5500
31628 || rs6000_cpu_attr == CPU_PPCE6500
31629 || rs6000_cpu_attr == CPU_POWER4
31630 || rs6000_cpu_attr == CPU_POWER5
31631 || rs6000_cpu_attr == CPU_POWER7
31632 || rs6000_cpu_attr == CPU_POWER8
31633 || rs6000_cpu_attr == CPU_POWER9
31634 || rs6000_cpu_attr == CPU_CELL)
31635 && recog_memoized (dep_insn)
31636 && (INSN_CODE (dep_insn) >= 0))
31638 switch (get_attr_type (dep_insn))
31640 case TYPE_CMP:
31641 case TYPE_FPCOMPARE:
31642 case TYPE_CR_LOGICAL:
31643 case TYPE_DELAYED_CR:
31644 return cost + 2;
31645 case TYPE_EXTS:
31646 case TYPE_MUL:
31647 if (get_attr_dot (dep_insn) == DOT_YES)
31648 return cost + 2;
31649 else
31650 break;
31651 case TYPE_SHIFT:
31652 if (get_attr_dot (dep_insn) == DOT_YES
31653 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
31654 return cost + 2;
31655 else
31656 break;
31657 default:
31658 break;
31660 break;
31662 case TYPE_STORE:
31663 case TYPE_FPSTORE:
31664 if ((rs6000_cpu == PROCESSOR_POWER6)
31665 && recog_memoized (dep_insn)
31666 && (INSN_CODE (dep_insn) >= 0))
31669 if (GET_CODE (PATTERN (insn)) != SET)
31670 /* If this happens, we have to extend this to schedule
31671 optimally. Return default for now. */
31672 return cost;
31674 /* Adjust the cost for the case where the value written
31675 by a fixed point operation is used as the address
31676 gen value on a store. */
31677 switch (get_attr_type (dep_insn))
31679 case TYPE_LOAD:
31680 case TYPE_CNTLZ:
31682 if (! store_data_bypass_p (dep_insn, insn))
31683 return get_attr_sign_extend (dep_insn)
31684 == SIGN_EXTEND_YES ? 6 : 4;
31685 break;
31687 case TYPE_SHIFT:
31689 if (! store_data_bypass_p (dep_insn, insn))
31690 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31691 6 : 3;
31692 break;
31694 case TYPE_INTEGER:
31695 case TYPE_ADD:
31696 case TYPE_LOGICAL:
31697 case TYPE_EXTS:
31698 case TYPE_INSERT:
31700 if (! store_data_bypass_p (dep_insn, insn))
31701 return 3;
31702 break;
31704 case TYPE_STORE:
31705 case TYPE_FPLOAD:
31706 case TYPE_FPSTORE:
31708 if (get_attr_update (dep_insn) == UPDATE_YES
31709 && ! store_data_bypass_p (dep_insn, insn))
31710 return 3;
31711 break;
31713 case TYPE_MUL:
31715 if (! store_data_bypass_p (dep_insn, insn))
31716 return 17;
31717 break;
31719 case TYPE_DIV:
31721 if (! store_data_bypass_p (dep_insn, insn))
31722 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31723 break;
31725 default:
31726 break;
31729 break;
31731 case TYPE_LOAD:
31732 if ((rs6000_cpu == PROCESSOR_POWER6)
31733 && recog_memoized (dep_insn)
31734 && (INSN_CODE (dep_insn) >= 0))
31737 /* Adjust the cost for the case where the value written
31738 by a fixed point instruction is used within the address
31739 gen portion of a subsequent load(u)(x) */
31740 switch (get_attr_type (dep_insn))
31742 case TYPE_LOAD:
31743 case TYPE_CNTLZ:
31745 if (set_to_load_agen (dep_insn, insn))
31746 return get_attr_sign_extend (dep_insn)
31747 == SIGN_EXTEND_YES ? 6 : 4;
31748 break;
31750 case TYPE_SHIFT:
31752 if (set_to_load_agen (dep_insn, insn))
31753 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31754 6 : 3;
31755 break;
31757 case TYPE_INTEGER:
31758 case TYPE_ADD:
31759 case TYPE_LOGICAL:
31760 case TYPE_EXTS:
31761 case TYPE_INSERT:
31763 if (set_to_load_agen (dep_insn, insn))
31764 return 3;
31765 break;
31767 case TYPE_STORE:
31768 case TYPE_FPLOAD:
31769 case TYPE_FPSTORE:
31771 if (get_attr_update (dep_insn) == UPDATE_YES
31772 && set_to_load_agen (dep_insn, insn))
31773 return 3;
31774 break;
31776 case TYPE_MUL:
31778 if (set_to_load_agen (dep_insn, insn))
31779 return 17;
31780 break;
31782 case TYPE_DIV:
31784 if (set_to_load_agen (dep_insn, insn))
31785 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31786 break;
31788 default:
31789 break;
31792 break;
31794 case TYPE_FPLOAD:
31795 if ((rs6000_cpu == PROCESSOR_POWER6)
31796 && get_attr_update (insn) == UPDATE_NO
31797 && recog_memoized (dep_insn)
31798 && (INSN_CODE (dep_insn) >= 0)
31799 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
31800 return 2;
31802 default:
31803 break;
31806 /* Fall out to return default cost. */
31808 break;
31810 case REG_DEP_OUTPUT:
31811 /* Output dependency; DEP_INSN writes a register that INSN writes some
31812 cycles later. */
31813 if ((rs6000_cpu == PROCESSOR_POWER6)
31814 && recog_memoized (dep_insn)
31815 && (INSN_CODE (dep_insn) >= 0))
31817 attr_type = get_attr_type (insn);
31819 switch (attr_type)
31821 case TYPE_FP:
31822 case TYPE_FPSIMPLE:
31823 if (get_attr_type (dep_insn) == TYPE_FP
31824 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
31825 return 1;
31826 break;
31827 case TYPE_FPLOAD:
31828 if (get_attr_update (insn) == UPDATE_NO
31829 && get_attr_type (dep_insn) == TYPE_MFFGPR)
31830 return 2;
31831 break;
31832 default:
31833 break;
31836 /* Fall through, no cost for output dependency. */
31837 /* FALLTHRU */
31839 case REG_DEP_ANTI:
31840 /* Anti dependency; DEP_INSN reads a register that INSN writes some
31841 cycles later. */
31842 return 0;
31844 default:
31845 gcc_unreachable ();
31848 return cost;
31851 /* Debug version of rs6000_adjust_cost. */
31853 static int
31854 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
31855 int cost, unsigned int dw)
31857 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
31859 if (ret != cost)
31861 const char *dep;
31863 switch (dep_type)
31865 default: dep = "unknown depencency"; break;
31866 case REG_DEP_TRUE: dep = "data dependency"; break;
31867 case REG_DEP_OUTPUT: dep = "output dependency"; break;
31868 case REG_DEP_ANTI: dep = "anti depencency"; break;
31871 fprintf (stderr,
31872 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
31873 "%s, insn:\n", ret, cost, dep);
31875 debug_rtx (insn);
31878 return ret;
31881 /* The function returns a true if INSN is microcoded.
31882 Return false otherwise. */
31884 static bool
31885 is_microcoded_insn (rtx_insn *insn)
31887 if (!insn || !NONDEBUG_INSN_P (insn)
31888 || GET_CODE (PATTERN (insn)) == USE
31889 || GET_CODE (PATTERN (insn)) == CLOBBER)
31890 return false;
31892 if (rs6000_cpu_attr == CPU_CELL)
31893 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
31895 if (rs6000_sched_groups
31896 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31898 enum attr_type type = get_attr_type (insn);
31899 if ((type == TYPE_LOAD
31900 && get_attr_update (insn) == UPDATE_YES
31901 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
31902 || ((type == TYPE_LOAD || type == TYPE_STORE)
31903 && get_attr_update (insn) == UPDATE_YES
31904 && get_attr_indexed (insn) == INDEXED_YES)
31905 || type == TYPE_MFCR)
31906 return true;
31909 return false;
31912 /* The function returns true if INSN is cracked into 2 instructions
31913 by the processor (and therefore occupies 2 issue slots). */
31915 static bool
31916 is_cracked_insn (rtx_insn *insn)
31918 if (!insn || !NONDEBUG_INSN_P (insn)
31919 || GET_CODE (PATTERN (insn)) == USE
31920 || GET_CODE (PATTERN (insn)) == CLOBBER)
31921 return false;
31923 if (rs6000_sched_groups
31924 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31926 enum attr_type type = get_attr_type (insn);
31927 if ((type == TYPE_LOAD
31928 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31929 && get_attr_update (insn) == UPDATE_NO)
31930 || (type == TYPE_LOAD
31931 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
31932 && get_attr_update (insn) == UPDATE_YES
31933 && get_attr_indexed (insn) == INDEXED_NO)
31934 || (type == TYPE_STORE
31935 && get_attr_update (insn) == UPDATE_YES
31936 && get_attr_indexed (insn) == INDEXED_NO)
31937 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
31938 && get_attr_update (insn) == UPDATE_YES)
31939 || type == TYPE_DELAYED_CR
31940 || (type == TYPE_EXTS
31941 && get_attr_dot (insn) == DOT_YES)
31942 || (type == TYPE_SHIFT
31943 && get_attr_dot (insn) == DOT_YES
31944 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
31945 || (type == TYPE_MUL
31946 && get_attr_dot (insn) == DOT_YES)
31947 || type == TYPE_DIV
31948 || (type == TYPE_INSERT
31949 && get_attr_size (insn) == SIZE_32))
31950 return true;
31953 return false;
31956 /* The function returns true if INSN can be issued only from
31957 the branch slot. */
31959 static bool
31960 is_branch_slot_insn (rtx_insn *insn)
31962 if (!insn || !NONDEBUG_INSN_P (insn)
31963 || GET_CODE (PATTERN (insn)) == USE
31964 || GET_CODE (PATTERN (insn)) == CLOBBER)
31965 return false;
31967 if (rs6000_sched_groups)
31969 enum attr_type type = get_attr_type (insn);
31970 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
31971 return true;
31972 return false;
31975 return false;
31978 /* The function returns true if out_inst sets a value that is
31979 used in the address generation computation of in_insn */
31980 static bool
31981 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
31983 rtx out_set, in_set;
31985 /* For performance reasons, only handle the simple case where
31986 both loads are a single_set. */
31987 out_set = single_set (out_insn);
31988 if (out_set)
31990 in_set = single_set (in_insn);
31991 if (in_set)
31992 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
31995 return false;
31998 /* Try to determine base/offset/size parts of the given MEM.
31999 Return true if successful, false if all the values couldn't
32000 be determined.
32002 This function only looks for REG or REG+CONST address forms.
32003 REG+REG address form will return false. */
32005 static bool
32006 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
32007 HOST_WIDE_INT *size)
32009 rtx addr_rtx;
32010 if MEM_SIZE_KNOWN_P (mem)
32011 *size = MEM_SIZE (mem);
32012 else
32013 return false;
32015 addr_rtx = (XEXP (mem, 0));
32016 if (GET_CODE (addr_rtx) == PRE_MODIFY)
32017 addr_rtx = XEXP (addr_rtx, 1);
32019 *offset = 0;
32020 while (GET_CODE (addr_rtx) == PLUS
32021 && CONST_INT_P (XEXP (addr_rtx, 1)))
32023 *offset += INTVAL (XEXP (addr_rtx, 1));
32024 addr_rtx = XEXP (addr_rtx, 0);
32026 if (!REG_P (addr_rtx))
32027 return false;
32029 *base = addr_rtx;
32030 return true;
32033 /* The function returns true if the target storage location of
32034 mem1 is adjacent to the target storage location of mem2 */
32035 /* Return 1 if memory locations are adjacent. */
32037 static bool
32038 adjacent_mem_locations (rtx mem1, rtx mem2)
32040 rtx reg1, reg2;
32041 HOST_WIDE_INT off1, size1, off2, size2;
32043 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32044 && get_memref_parts (mem2, &reg2, &off2, &size2))
32045 return ((REGNO (reg1) == REGNO (reg2))
32046 && ((off1 + size1 == off2)
32047 || (off2 + size2 == off1)));
32049 return false;
32052 /* This function returns true if it can be determined that the two MEM
32053 locations overlap by at least 1 byte based on base reg/offset/size. */
32055 static bool
32056 mem_locations_overlap (rtx mem1, rtx mem2)
32058 rtx reg1, reg2;
32059 HOST_WIDE_INT off1, size1, off2, size2;
32061 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32062 && get_memref_parts (mem2, &reg2, &off2, &size2))
32063 return ((REGNO (reg1) == REGNO (reg2))
32064 && (((off1 <= off2) && (off1 + size1 > off2))
32065 || ((off2 <= off1) && (off2 + size2 > off1))));
32067 return false;
32070 /* A C statement (sans semicolon) to update the integer scheduling
32071 priority INSN_PRIORITY (INSN). Increase the priority to execute the
32072 INSN earlier, reduce the priority to execute INSN later. Do not
32073 define this macro if you do not need to adjust the scheduling
32074 priorities of insns. */
32076 static int
32077 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
32079 rtx load_mem, str_mem;
32080 /* On machines (like the 750) which have asymmetric integer units,
32081 where one integer unit can do multiply and divides and the other
32082 can't, reduce the priority of multiply/divide so it is scheduled
32083 before other integer operations. */
32085 #if 0
32086 if (! INSN_P (insn))
32087 return priority;
32089 if (GET_CODE (PATTERN (insn)) == USE)
32090 return priority;
32092 switch (rs6000_cpu_attr) {
32093 case CPU_PPC750:
32094 switch (get_attr_type (insn))
32096 default:
32097 break;
32099 case TYPE_MUL:
32100 case TYPE_DIV:
32101 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
32102 priority, priority);
32103 if (priority >= 0 && priority < 0x01000000)
32104 priority >>= 3;
32105 break;
32108 #endif
32110 if (insn_must_be_first_in_group (insn)
32111 && reload_completed
32112 && current_sched_info->sched_max_insns_priority
32113 && rs6000_sched_restricted_insns_priority)
32116 /* Prioritize insns that can be dispatched only in the first
32117 dispatch slot. */
32118 if (rs6000_sched_restricted_insns_priority == 1)
32119 /* Attach highest priority to insn. This means that in
32120 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
32121 precede 'priority' (critical path) considerations. */
32122 return current_sched_info->sched_max_insns_priority;
32123 else if (rs6000_sched_restricted_insns_priority == 2)
32124 /* Increase priority of insn by a minimal amount. This means that in
32125 haifa-sched.c:ready_sort(), only 'priority' (critical path)
32126 considerations precede dispatch-slot restriction considerations. */
32127 return (priority + 1);
32130 if (rs6000_cpu == PROCESSOR_POWER6
32131 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
32132 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
32133 /* Attach highest priority to insn if the scheduler has just issued two
32134 stores and this instruction is a load, or two loads and this instruction
32135 is a store. Power6 wants loads and stores scheduled alternately
32136 when possible */
32137 return current_sched_info->sched_max_insns_priority;
32139 return priority;
32142 /* Return true if the instruction is nonpipelined on the Cell. */
32143 static bool
32144 is_nonpipeline_insn (rtx_insn *insn)
32146 enum attr_type type;
32147 if (!insn || !NONDEBUG_INSN_P (insn)
32148 || GET_CODE (PATTERN (insn)) == USE
32149 || GET_CODE (PATTERN (insn)) == CLOBBER)
32150 return false;
32152 type = get_attr_type (insn);
32153 if (type == TYPE_MUL
32154 || type == TYPE_DIV
32155 || type == TYPE_SDIV
32156 || type == TYPE_DDIV
32157 || type == TYPE_SSQRT
32158 || type == TYPE_DSQRT
32159 || type == TYPE_MFCR
32160 || type == TYPE_MFCRF
32161 || type == TYPE_MFJMPR)
32163 return true;
32165 return false;
32169 /* Return how many instructions the machine can issue per cycle. */
32171 static int
32172 rs6000_issue_rate (void)
32174 /* Unless scheduling for register pressure, use issue rate of 1 for
32175 first scheduling pass to decrease degradation. */
32176 if (!reload_completed && !flag_sched_pressure)
32177 return 1;
32179 switch (rs6000_cpu_attr) {
32180 case CPU_RS64A:
32181 case CPU_PPC601: /* ? */
32182 case CPU_PPC7450:
32183 return 3;
32184 case CPU_PPC440:
32185 case CPU_PPC603:
32186 case CPU_PPC750:
32187 case CPU_PPC7400:
32188 case CPU_PPC8540:
32189 case CPU_PPC8548:
32190 case CPU_CELL:
32191 case CPU_PPCE300C2:
32192 case CPU_PPCE300C3:
32193 case CPU_PPCE500MC:
32194 case CPU_PPCE500MC64:
32195 case CPU_PPCE5500:
32196 case CPU_PPCE6500:
32197 case CPU_TITAN:
32198 return 2;
32199 case CPU_PPC476:
32200 case CPU_PPC604:
32201 case CPU_PPC604E:
32202 case CPU_PPC620:
32203 case CPU_PPC630:
32204 return 4;
32205 case CPU_POWER4:
32206 case CPU_POWER5:
32207 case CPU_POWER6:
32208 case CPU_POWER7:
32209 return 5;
32210 case CPU_POWER8:
32211 return 7;
32212 case CPU_POWER9:
32213 return 6;
32214 default:
32215 return 1;
32219 /* Return how many instructions to look ahead for better insn
32220 scheduling. */
32222 static int
32223 rs6000_use_sched_lookahead (void)
32225 switch (rs6000_cpu_attr)
32227 case CPU_PPC8540:
32228 case CPU_PPC8548:
32229 return 4;
32231 case CPU_CELL:
32232 return (reload_completed ? 8 : 0);
32234 default:
32235 return 0;
32239 /* We are choosing insn from the ready queue. Return zero if INSN can be
32240 chosen. */
32241 static int
32242 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
32244 if (ready_index == 0)
32245 return 0;
32247 if (rs6000_cpu_attr != CPU_CELL)
32248 return 0;
32250 gcc_assert (insn != NULL_RTX && INSN_P (insn));
32252 if (!reload_completed
32253 || is_nonpipeline_insn (insn)
32254 || is_microcoded_insn (insn))
32255 return 1;
32257 return 0;
32260 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
32261 and return true. */
32263 static bool
32264 find_mem_ref (rtx pat, rtx *mem_ref)
32266 const char * fmt;
32267 int i, j;
32269 /* stack_tie does not produce any real memory traffic. */
32270 if (tie_operand (pat, VOIDmode))
32271 return false;
32273 if (GET_CODE (pat) == MEM)
32275 *mem_ref = pat;
32276 return true;
32279 /* Recursively process the pattern. */
32280 fmt = GET_RTX_FORMAT (GET_CODE (pat));
32282 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
32284 if (fmt[i] == 'e')
32286 if (find_mem_ref (XEXP (pat, i), mem_ref))
32287 return true;
32289 else if (fmt[i] == 'E')
32290 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
32292 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
32293 return true;
32297 return false;
32300 /* Determine if PAT is a PATTERN of a load insn. */
32302 static bool
32303 is_load_insn1 (rtx pat, rtx *load_mem)
32305 if (!pat || pat == NULL_RTX)
32306 return false;
32308 if (GET_CODE (pat) == SET)
32309 return find_mem_ref (SET_SRC (pat), load_mem);
32311 if (GET_CODE (pat) == PARALLEL)
32313 int i;
32315 for (i = 0; i < XVECLEN (pat, 0); i++)
32316 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
32317 return true;
32320 return false;
32323 /* Determine if INSN loads from memory. */
32325 static bool
32326 is_load_insn (rtx insn, rtx *load_mem)
32328 if (!insn || !INSN_P (insn))
32329 return false;
32331 if (CALL_P (insn))
32332 return false;
32334 return is_load_insn1 (PATTERN (insn), load_mem);
32337 /* Determine if PAT is a PATTERN of a store insn. */
32339 static bool
32340 is_store_insn1 (rtx pat, rtx *str_mem)
32342 if (!pat || pat == NULL_RTX)
32343 return false;
32345 if (GET_CODE (pat) == SET)
32346 return find_mem_ref (SET_DEST (pat), str_mem);
32348 if (GET_CODE (pat) == PARALLEL)
32350 int i;
32352 for (i = 0; i < XVECLEN (pat, 0); i++)
32353 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
32354 return true;
32357 return false;
32360 /* Determine if INSN stores to memory. */
32362 static bool
32363 is_store_insn (rtx insn, rtx *str_mem)
32365 if (!insn || !INSN_P (insn))
32366 return false;
32368 return is_store_insn1 (PATTERN (insn), str_mem);
32371 /* Return whether TYPE is a Power9 pairable vector instruction type. */
32373 static bool
32374 is_power9_pairable_vec_type (enum attr_type type)
32376 switch (type)
32378 case TYPE_VECSIMPLE:
32379 case TYPE_VECCOMPLEX:
32380 case TYPE_VECDIV:
32381 case TYPE_VECCMP:
32382 case TYPE_VECPERM:
32383 case TYPE_VECFLOAT:
32384 case TYPE_VECFDIV:
32385 case TYPE_VECDOUBLE:
32386 return true;
32387 default:
32388 break;
32390 return false;
32393 /* Returns whether the dependence between INSN and NEXT is considered
32394 costly by the given target. */
32396 static bool
32397 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
32399 rtx insn;
32400 rtx next;
32401 rtx load_mem, str_mem;
32403 /* If the flag is not enabled - no dependence is considered costly;
32404 allow all dependent insns in the same group.
32405 This is the most aggressive option. */
32406 if (rs6000_sched_costly_dep == no_dep_costly)
32407 return false;
32409 /* If the flag is set to 1 - a dependence is always considered costly;
32410 do not allow dependent instructions in the same group.
32411 This is the most conservative option. */
32412 if (rs6000_sched_costly_dep == all_deps_costly)
32413 return true;
32415 insn = DEP_PRO (dep);
32416 next = DEP_CON (dep);
32418 if (rs6000_sched_costly_dep == store_to_load_dep_costly
32419 && is_load_insn (next, &load_mem)
32420 && is_store_insn (insn, &str_mem))
32421 /* Prevent load after store in the same group. */
32422 return true;
32424 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
32425 && is_load_insn (next, &load_mem)
32426 && is_store_insn (insn, &str_mem)
32427 && DEP_TYPE (dep) == REG_DEP_TRUE
32428 && mem_locations_overlap(str_mem, load_mem))
32429 /* Prevent load after store in the same group if it is a true
32430 dependence. */
32431 return true;
32433 /* The flag is set to X; dependences with latency >= X are considered costly,
32434 and will not be scheduled in the same group. */
32435 if (rs6000_sched_costly_dep <= max_dep_latency
32436 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
32437 return true;
32439 return false;
32442 /* Return the next insn after INSN that is found before TAIL is reached,
32443 skipping any "non-active" insns - insns that will not actually occupy
32444 an issue slot. Return NULL_RTX if such an insn is not found. */
32446 static rtx_insn *
32447 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
32449 if (insn == NULL_RTX || insn == tail)
32450 return NULL;
32452 while (1)
32454 insn = NEXT_INSN (insn);
32455 if (insn == NULL_RTX || insn == tail)
32456 return NULL;
32458 if (CALL_P (insn)
32459 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
32460 || (NONJUMP_INSN_P (insn)
32461 && GET_CODE (PATTERN (insn)) != USE
32462 && GET_CODE (PATTERN (insn)) != CLOBBER
32463 && INSN_CODE (insn) != CODE_FOR_stack_tie))
32464 break;
32466 return insn;
32469 /* Do Power9 specific sched_reorder2 reordering of ready list. */
32471 static int
32472 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
32474 int pos;
32475 int i;
32476 rtx_insn *tmp;
32477 enum attr_type type;
32479 type = get_attr_type (last_scheduled_insn);
32481 /* Try to issue fixed point divides back-to-back in pairs so they will be
32482 routed to separate execution units and execute in parallel. */
32483 if (type == TYPE_DIV && divide_cnt == 0)
32485 /* First divide has been scheduled. */
32486 divide_cnt = 1;
32488 /* Scan the ready list looking for another divide, if found move it
32489 to the end of the list so it is chosen next. */
32490 pos = lastpos;
32491 while (pos >= 0)
32493 if (recog_memoized (ready[pos]) >= 0
32494 && get_attr_type (ready[pos]) == TYPE_DIV)
32496 tmp = ready[pos];
32497 for (i = pos; i < lastpos; i++)
32498 ready[i] = ready[i + 1];
32499 ready[lastpos] = tmp;
32500 break;
32502 pos--;
32505 else
32507 /* Last insn was the 2nd divide or not a divide, reset the counter. */
32508 divide_cnt = 0;
32510 /* Power9 can execute 2 vector operations and 2 vector loads in a single
32511 cycle. So try to pair up and alternate groups of vector and vector
32512 load instructions.
32514 To aid this formation, a counter is maintained to keep track of
32515 vec/vecload insns issued. The value of vec_load_pendulum maintains
32516 the current state with the following values:
32518 0 : Initial state, no vec/vecload group has been started.
32520 -1 : 1 vector load has been issued and another has been found on
32521 the ready list and moved to the end.
32523 -2 : 2 vector loads have been issued and a vector operation has
32524 been found and moved to the end of the ready list.
32526 -3 : 2 vector loads and a vector insn have been issued and a
32527 vector operation has been found and moved to the end of the
32528 ready list.
32530 1 : 1 vector insn has been issued and another has been found and
32531 moved to the end of the ready list.
32533 2 : 2 vector insns have been issued and a vector load has been
32534 found and moved to the end of the ready list.
32536 3 : 2 vector insns and a vector load have been issued and another
32537 vector load has been found and moved to the end of the ready
32538 list. */
32539 if (type == TYPE_VECLOAD)
32541 /* Issued a vecload. */
32542 if (vec_load_pendulum == 0)
32544 /* We issued a single vecload, look for another and move it to
32545 the end of the ready list so it will be scheduled next.
32546 Set pendulum if found. */
32547 pos = lastpos;
32548 while (pos >= 0)
32550 if (recog_memoized (ready[pos]) >= 0
32551 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32553 tmp = ready[pos];
32554 for (i = pos; i < lastpos; i++)
32555 ready[i] = ready[i + 1];
32556 ready[lastpos] = tmp;
32557 vec_load_pendulum = -1;
32558 return cached_can_issue_more;
32560 pos--;
32563 else if (vec_load_pendulum == -1)
32565 /* This is the second vecload we've issued, search the ready
32566 list for a vector operation so we can try to schedule a
32567 pair of those next. If found move to the end of the ready
32568 list so it is scheduled next and set the pendulum. */
32569 pos = lastpos;
32570 while (pos >= 0)
32572 if (recog_memoized (ready[pos]) >= 0
32573 && is_power9_pairable_vec_type (
32574 get_attr_type (ready[pos])))
32576 tmp = ready[pos];
32577 for (i = pos; i < lastpos; i++)
32578 ready[i] = ready[i + 1];
32579 ready[lastpos] = tmp;
32580 vec_load_pendulum = -2;
32581 return cached_can_issue_more;
32583 pos--;
32586 else if (vec_load_pendulum == 2)
32588 /* Two vector ops have been issued and we've just issued a
32589 vecload, look for another vecload and move to end of ready
32590 list if found. */
32591 pos = lastpos;
32592 while (pos >= 0)
32594 if (recog_memoized (ready[pos]) >= 0
32595 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32597 tmp = ready[pos];
32598 for (i = pos; i < lastpos; i++)
32599 ready[i] = ready[i + 1];
32600 ready[lastpos] = tmp;
32601 /* Set pendulum so that next vecload will be seen as
32602 finishing a group, not start of one. */
32603 vec_load_pendulum = 3;
32604 return cached_can_issue_more;
32606 pos--;
32610 else if (is_power9_pairable_vec_type (type))
32612 /* Issued a vector operation. */
32613 if (vec_load_pendulum == 0)
32614 /* We issued a single vec op, look for another and move it
32615 to the end of the ready list so it will be scheduled next.
32616 Set pendulum if found. */
32618 pos = lastpos;
32619 while (pos >= 0)
32621 if (recog_memoized (ready[pos]) >= 0
32622 && is_power9_pairable_vec_type (
32623 get_attr_type (ready[pos])))
32625 tmp = ready[pos];
32626 for (i = pos; i < lastpos; i++)
32627 ready[i] = ready[i + 1];
32628 ready[lastpos] = tmp;
32629 vec_load_pendulum = 1;
32630 return cached_can_issue_more;
32632 pos--;
32635 else if (vec_load_pendulum == 1)
32637 /* This is the second vec op we've issued, search the ready
32638 list for a vecload operation so we can try to schedule a
32639 pair of those next. If found move to the end of the ready
32640 list so it is scheduled next and set the pendulum. */
32641 pos = lastpos;
32642 while (pos >= 0)
32644 if (recog_memoized (ready[pos]) >= 0
32645 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32647 tmp = ready[pos];
32648 for (i = pos; i < lastpos; i++)
32649 ready[i] = ready[i + 1];
32650 ready[lastpos] = tmp;
32651 vec_load_pendulum = 2;
32652 return cached_can_issue_more;
32654 pos--;
32657 else if (vec_load_pendulum == -2)
32659 /* Two vecload ops have been issued and we've just issued a
32660 vec op, look for another vec op and move to end of ready
32661 list if found. */
32662 pos = lastpos;
32663 while (pos >= 0)
32665 if (recog_memoized (ready[pos]) >= 0
32666 && is_power9_pairable_vec_type (
32667 get_attr_type (ready[pos])))
32669 tmp = ready[pos];
32670 for (i = pos; i < lastpos; i++)
32671 ready[i] = ready[i + 1];
32672 ready[lastpos] = tmp;
32673 /* Set pendulum so that next vec op will be seen as
32674 finishing a group, not start of one. */
32675 vec_load_pendulum = -3;
32676 return cached_can_issue_more;
32678 pos--;
32683 /* We've either finished a vec/vecload group, couldn't find an insn to
32684 continue the current group, or the last insn had nothing to do with
32685 with a group. In any case, reset the pendulum. */
32686 vec_load_pendulum = 0;
32689 return cached_can_issue_more;
32692 /* We are about to begin issuing insns for this clock cycle. */
32694 static int
32695 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
32696 rtx_insn **ready ATTRIBUTE_UNUSED,
32697 int *pn_ready ATTRIBUTE_UNUSED,
32698 int clock_var ATTRIBUTE_UNUSED)
32700 int n_ready = *pn_ready;
32702 if (sched_verbose)
32703 fprintf (dump, "// rs6000_sched_reorder :\n");
32705 /* Reorder the ready list, if the second to last ready insn
32706 is a nonepipeline insn. */
32707 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
32709 if (is_nonpipeline_insn (ready[n_ready - 1])
32710 && (recog_memoized (ready[n_ready - 2]) > 0))
32711 /* Simply swap first two insns. */
32712 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
32715 if (rs6000_cpu == PROCESSOR_POWER6)
32716 load_store_pendulum = 0;
32718 return rs6000_issue_rate ();
32721 /* Like rs6000_sched_reorder, but called after issuing each insn. */
32723 static int
32724 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
32725 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
32727 if (sched_verbose)
32728 fprintf (dump, "// rs6000_sched_reorder2 :\n");
32730 /* For Power6, we need to handle some special cases to try and keep the
32731 store queue from overflowing and triggering expensive flushes.
32733 This code monitors how load and store instructions are being issued
32734 and skews the ready list one way or the other to increase the likelihood
32735 that a desired instruction is issued at the proper time.
32737 A couple of things are done. First, we maintain a "load_store_pendulum"
32738 to track the current state of load/store issue.
32740 - If the pendulum is at zero, then no loads or stores have been
32741 issued in the current cycle so we do nothing.
32743 - If the pendulum is 1, then a single load has been issued in this
32744 cycle and we attempt to locate another load in the ready list to
32745 issue with it.
32747 - If the pendulum is -2, then two stores have already been
32748 issued in this cycle, so we increase the priority of the first load
32749 in the ready list to increase it's likelihood of being chosen first
32750 in the next cycle.
32752 - If the pendulum is -1, then a single store has been issued in this
32753 cycle and we attempt to locate another store in the ready list to
32754 issue with it, preferring a store to an adjacent memory location to
32755 facilitate store pairing in the store queue.
32757 - If the pendulum is 2, then two loads have already been
32758 issued in this cycle, so we increase the priority of the first store
32759 in the ready list to increase it's likelihood of being chosen first
32760 in the next cycle.
32762 - If the pendulum < -2 or > 2, then do nothing.
32764 Note: This code covers the most common scenarios. There exist non
32765 load/store instructions which make use of the LSU and which
32766 would need to be accounted for to strictly model the behavior
32767 of the machine. Those instructions are currently unaccounted
32768 for to help minimize compile time overhead of this code.
32770 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
32772 int pos;
32773 int i;
32774 rtx_insn *tmp;
32775 rtx load_mem, str_mem;
32777 if (is_store_insn (last_scheduled_insn, &str_mem))
32778 /* Issuing a store, swing the load_store_pendulum to the left */
32779 load_store_pendulum--;
32780 else if (is_load_insn (last_scheduled_insn, &load_mem))
32781 /* Issuing a load, swing the load_store_pendulum to the right */
32782 load_store_pendulum++;
32783 else
32784 return cached_can_issue_more;
32786 /* If the pendulum is balanced, or there is only one instruction on
32787 the ready list, then all is well, so return. */
32788 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
32789 return cached_can_issue_more;
32791 if (load_store_pendulum == 1)
32793 /* A load has been issued in this cycle. Scan the ready list
32794 for another load to issue with it */
32795 pos = *pn_ready-1;
32797 while (pos >= 0)
32799 if (is_load_insn (ready[pos], &load_mem))
32801 /* Found a load. Move it to the head of the ready list,
32802 and adjust it's priority so that it is more likely to
32803 stay there */
32804 tmp = ready[pos];
32805 for (i=pos; i<*pn_ready-1; i++)
32806 ready[i] = ready[i + 1];
32807 ready[*pn_ready-1] = tmp;
32809 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32810 INSN_PRIORITY (tmp)++;
32811 break;
32813 pos--;
32816 else if (load_store_pendulum == -2)
32818 /* Two stores have been issued in this cycle. Increase the
32819 priority of the first load in the ready list to favor it for
32820 issuing in the next cycle. */
32821 pos = *pn_ready-1;
32823 while (pos >= 0)
32825 if (is_load_insn (ready[pos], &load_mem)
32826 && !sel_sched_p ()
32827 && INSN_PRIORITY_KNOWN (ready[pos]))
32829 INSN_PRIORITY (ready[pos])++;
32831 /* Adjust the pendulum to account for the fact that a load
32832 was found and increased in priority. This is to prevent
32833 increasing the priority of multiple loads */
32834 load_store_pendulum--;
32836 break;
32838 pos--;
32841 else if (load_store_pendulum == -1)
32843 /* A store has been issued in this cycle. Scan the ready list for
32844 another store to issue with it, preferring a store to an adjacent
32845 memory location */
32846 int first_store_pos = -1;
32848 pos = *pn_ready-1;
32850 while (pos >= 0)
32852 if (is_store_insn (ready[pos], &str_mem))
32854 rtx str_mem2;
32855 /* Maintain the index of the first store found on the
32856 list */
32857 if (first_store_pos == -1)
32858 first_store_pos = pos;
32860 if (is_store_insn (last_scheduled_insn, &str_mem2)
32861 && adjacent_mem_locations (str_mem, str_mem2))
32863 /* Found an adjacent store. Move it to the head of the
32864 ready list, and adjust it's priority so that it is
32865 more likely to stay there */
32866 tmp = ready[pos];
32867 for (i=pos; i<*pn_ready-1; i++)
32868 ready[i] = ready[i + 1];
32869 ready[*pn_ready-1] = tmp;
32871 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32872 INSN_PRIORITY (tmp)++;
32874 first_store_pos = -1;
32876 break;
32879 pos--;
32882 if (first_store_pos >= 0)
32884 /* An adjacent store wasn't found, but a non-adjacent store was,
32885 so move the non-adjacent store to the front of the ready
32886 list, and adjust its priority so that it is more likely to
32887 stay there. */
32888 tmp = ready[first_store_pos];
32889 for (i=first_store_pos; i<*pn_ready-1; i++)
32890 ready[i] = ready[i + 1];
32891 ready[*pn_ready-1] = tmp;
32892 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32893 INSN_PRIORITY (tmp)++;
32896 else if (load_store_pendulum == 2)
32898 /* Two loads have been issued in this cycle. Increase the priority
32899 of the first store in the ready list to favor it for issuing in
32900 the next cycle. */
32901 pos = *pn_ready-1;
32903 while (pos >= 0)
32905 if (is_store_insn (ready[pos], &str_mem)
32906 && !sel_sched_p ()
32907 && INSN_PRIORITY_KNOWN (ready[pos]))
32909 INSN_PRIORITY (ready[pos])++;
32911 /* Adjust the pendulum to account for the fact that a store
32912 was found and increased in priority. This is to prevent
32913 increasing the priority of multiple stores */
32914 load_store_pendulum++;
32916 break;
32918 pos--;
32923 /* Do Power9 dependent reordering if necessary. */
32924 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
32925 && recog_memoized (last_scheduled_insn) >= 0)
32926 return power9_sched_reorder2 (ready, *pn_ready - 1);
32928 return cached_can_issue_more;
32931 /* Return whether the presence of INSN causes a dispatch group termination
32932 of group WHICH_GROUP.
32934 If WHICH_GROUP == current_group, this function will return true if INSN
32935 causes the termination of the current group (i.e, the dispatch group to
32936 which INSN belongs). This means that INSN will be the last insn in the
32937 group it belongs to.
32939 If WHICH_GROUP == previous_group, this function will return true if INSN
32940 causes the termination of the previous group (i.e, the dispatch group that
32941 precedes the group to which INSN belongs). This means that INSN will be
32942 the first insn in the group it belongs to). */
32944 static bool
32945 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
32947 bool first, last;
32949 if (! insn)
32950 return false;
32952 first = insn_must_be_first_in_group (insn);
32953 last = insn_must_be_last_in_group (insn);
32955 if (first && last)
32956 return true;
32958 if (which_group == current_group)
32959 return last;
32960 else if (which_group == previous_group)
32961 return first;
32963 return false;
32967 static bool
32968 insn_must_be_first_in_group (rtx_insn *insn)
32970 enum attr_type type;
32972 if (!insn
32973 || NOTE_P (insn)
32974 || DEBUG_INSN_P (insn)
32975 || GET_CODE (PATTERN (insn)) == USE
32976 || GET_CODE (PATTERN (insn)) == CLOBBER)
32977 return false;
32979 switch (rs6000_cpu)
32981 case PROCESSOR_POWER5:
32982 if (is_cracked_insn (insn))
32983 return true;
32984 /* FALLTHRU */
32985 case PROCESSOR_POWER4:
32986 if (is_microcoded_insn (insn))
32987 return true;
32989 if (!rs6000_sched_groups)
32990 return false;
32992 type = get_attr_type (insn);
32994 switch (type)
32996 case TYPE_MFCR:
32997 case TYPE_MFCRF:
32998 case TYPE_MTCR:
32999 case TYPE_DELAYED_CR:
33000 case TYPE_CR_LOGICAL:
33001 case TYPE_MTJMPR:
33002 case TYPE_MFJMPR:
33003 case TYPE_DIV:
33004 case TYPE_LOAD_L:
33005 case TYPE_STORE_C:
33006 case TYPE_ISYNC:
33007 case TYPE_SYNC:
33008 return true;
33009 default:
33010 break;
33012 break;
33013 case PROCESSOR_POWER6:
33014 type = get_attr_type (insn);
33016 switch (type)
33018 case TYPE_EXTS:
33019 case TYPE_CNTLZ:
33020 case TYPE_TRAP:
33021 case TYPE_MUL:
33022 case TYPE_INSERT:
33023 case TYPE_FPCOMPARE:
33024 case TYPE_MFCR:
33025 case TYPE_MTCR:
33026 case TYPE_MFJMPR:
33027 case TYPE_MTJMPR:
33028 case TYPE_ISYNC:
33029 case TYPE_SYNC:
33030 case TYPE_LOAD_L:
33031 case TYPE_STORE_C:
33032 return true;
33033 case TYPE_SHIFT:
33034 if (get_attr_dot (insn) == DOT_NO
33035 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33036 return true;
33037 else
33038 break;
33039 case TYPE_DIV:
33040 if (get_attr_size (insn) == SIZE_32)
33041 return true;
33042 else
33043 break;
33044 case TYPE_LOAD:
33045 case TYPE_STORE:
33046 case TYPE_FPLOAD:
33047 case TYPE_FPSTORE:
33048 if (get_attr_update (insn) == UPDATE_YES)
33049 return true;
33050 else
33051 break;
33052 default:
33053 break;
33055 break;
33056 case PROCESSOR_POWER7:
33057 type = get_attr_type (insn);
33059 switch (type)
33061 case TYPE_CR_LOGICAL:
33062 case TYPE_MFCR:
33063 case TYPE_MFCRF:
33064 case TYPE_MTCR:
33065 case TYPE_DIV:
33066 case TYPE_ISYNC:
33067 case TYPE_LOAD_L:
33068 case TYPE_STORE_C:
33069 case TYPE_MFJMPR:
33070 case TYPE_MTJMPR:
33071 return true;
33072 case TYPE_MUL:
33073 case TYPE_SHIFT:
33074 case TYPE_EXTS:
33075 if (get_attr_dot (insn) == DOT_YES)
33076 return true;
33077 else
33078 break;
33079 case TYPE_LOAD:
33080 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33081 || get_attr_update (insn) == UPDATE_YES)
33082 return true;
33083 else
33084 break;
33085 case TYPE_STORE:
33086 case TYPE_FPLOAD:
33087 case TYPE_FPSTORE:
33088 if (get_attr_update (insn) == UPDATE_YES)
33089 return true;
33090 else
33091 break;
33092 default:
33093 break;
33095 break;
33096 case PROCESSOR_POWER8:
33097 type = get_attr_type (insn);
33099 switch (type)
33101 case TYPE_CR_LOGICAL:
33102 case TYPE_DELAYED_CR:
33103 case TYPE_MFCR:
33104 case TYPE_MFCRF:
33105 case TYPE_MTCR:
33106 case TYPE_SYNC:
33107 case TYPE_ISYNC:
33108 case TYPE_LOAD_L:
33109 case TYPE_STORE_C:
33110 case TYPE_VECSTORE:
33111 case TYPE_MFJMPR:
33112 case TYPE_MTJMPR:
33113 return true;
33114 case TYPE_SHIFT:
33115 case TYPE_EXTS:
33116 case TYPE_MUL:
33117 if (get_attr_dot (insn) == DOT_YES)
33118 return true;
33119 else
33120 break;
33121 case TYPE_LOAD:
33122 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33123 || get_attr_update (insn) == UPDATE_YES)
33124 return true;
33125 else
33126 break;
33127 case TYPE_STORE:
33128 if (get_attr_update (insn) == UPDATE_YES
33129 && get_attr_indexed (insn) == INDEXED_YES)
33130 return true;
33131 else
33132 break;
33133 default:
33134 break;
33136 break;
33137 default:
33138 break;
33141 return false;
33144 static bool
33145 insn_must_be_last_in_group (rtx_insn *insn)
33147 enum attr_type type;
33149 if (!insn
33150 || NOTE_P (insn)
33151 || DEBUG_INSN_P (insn)
33152 || GET_CODE (PATTERN (insn)) == USE
33153 || GET_CODE (PATTERN (insn)) == CLOBBER)
33154 return false;
33156 switch (rs6000_cpu) {
33157 case PROCESSOR_POWER4:
33158 case PROCESSOR_POWER5:
33159 if (is_microcoded_insn (insn))
33160 return true;
33162 if (is_branch_slot_insn (insn))
33163 return true;
33165 break;
33166 case PROCESSOR_POWER6:
33167 type = get_attr_type (insn);
33169 switch (type)
33171 case TYPE_EXTS:
33172 case TYPE_CNTLZ:
33173 case TYPE_TRAP:
33174 case TYPE_MUL:
33175 case TYPE_FPCOMPARE:
33176 case TYPE_MFCR:
33177 case TYPE_MTCR:
33178 case TYPE_MFJMPR:
33179 case TYPE_MTJMPR:
33180 case TYPE_ISYNC:
33181 case TYPE_SYNC:
33182 case TYPE_LOAD_L:
33183 case TYPE_STORE_C:
33184 return true;
33185 case TYPE_SHIFT:
33186 if (get_attr_dot (insn) == DOT_NO
33187 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33188 return true;
33189 else
33190 break;
33191 case TYPE_DIV:
33192 if (get_attr_size (insn) == SIZE_32)
33193 return true;
33194 else
33195 break;
33196 default:
33197 break;
33199 break;
33200 case PROCESSOR_POWER7:
33201 type = get_attr_type (insn);
33203 switch (type)
33205 case TYPE_ISYNC:
33206 case TYPE_SYNC:
33207 case TYPE_LOAD_L:
33208 case TYPE_STORE_C:
33209 return true;
33210 case TYPE_LOAD:
33211 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33212 && get_attr_update (insn) == UPDATE_YES)
33213 return true;
33214 else
33215 break;
33216 case TYPE_STORE:
33217 if (get_attr_update (insn) == UPDATE_YES
33218 && get_attr_indexed (insn) == INDEXED_YES)
33219 return true;
33220 else
33221 break;
33222 default:
33223 break;
33225 break;
33226 case PROCESSOR_POWER8:
33227 type = get_attr_type (insn);
33229 switch (type)
33231 case TYPE_MFCR:
33232 case TYPE_MTCR:
33233 case TYPE_ISYNC:
33234 case TYPE_SYNC:
33235 case TYPE_LOAD_L:
33236 case TYPE_STORE_C:
33237 return true;
33238 case TYPE_LOAD:
33239 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33240 && get_attr_update (insn) == UPDATE_YES)
33241 return true;
33242 else
33243 break;
33244 case TYPE_STORE:
33245 if (get_attr_update (insn) == UPDATE_YES
33246 && get_attr_indexed (insn) == INDEXED_YES)
33247 return true;
33248 else
33249 break;
33250 default:
33251 break;
33253 break;
33254 default:
33255 break;
33258 return false;
33261 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
33262 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
33264 static bool
33265 is_costly_group (rtx *group_insns, rtx next_insn)
33267 int i;
33268 int issue_rate = rs6000_issue_rate ();
33270 for (i = 0; i < issue_rate; i++)
33272 sd_iterator_def sd_it;
33273 dep_t dep;
33274 rtx insn = group_insns[i];
33276 if (!insn)
33277 continue;
33279 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
33281 rtx next = DEP_CON (dep);
33283 if (next == next_insn
33284 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
33285 return true;
33289 return false;
33292 /* Utility of the function redefine_groups.
33293 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
33294 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
33295 to keep it "far" (in a separate group) from GROUP_INSNS, following
33296 one of the following schemes, depending on the value of the flag
33297 -minsert_sched_nops = X:
33298 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
33299 in order to force NEXT_INSN into a separate group.
33300 (2) X < sched_finish_regroup_exact: insert exactly X nops.
33301 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
33302 insertion (has a group just ended, how many vacant issue slots remain in the
33303 last group, and how many dispatch groups were encountered so far). */
33305 static int
33306 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
33307 rtx_insn *next_insn, bool *group_end, int can_issue_more,
33308 int *group_count)
33310 rtx nop;
33311 bool force;
33312 int issue_rate = rs6000_issue_rate ();
33313 bool end = *group_end;
33314 int i;
33316 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
33317 return can_issue_more;
33319 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
33320 return can_issue_more;
33322 force = is_costly_group (group_insns, next_insn);
33323 if (!force)
33324 return can_issue_more;
33326 if (sched_verbose > 6)
33327 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
33328 *group_count ,can_issue_more);
33330 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
33332 if (*group_end)
33333 can_issue_more = 0;
33335 /* Since only a branch can be issued in the last issue_slot, it is
33336 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
33337 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
33338 in this case the last nop will start a new group and the branch
33339 will be forced to the new group. */
33340 if (can_issue_more && !is_branch_slot_insn (next_insn))
33341 can_issue_more--;
33343 /* Do we have a special group ending nop? */
33344 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
33345 || rs6000_cpu_attr == CPU_POWER8)
33347 nop = gen_group_ending_nop ();
33348 emit_insn_before (nop, next_insn);
33349 can_issue_more = 0;
33351 else
33352 while (can_issue_more > 0)
33354 nop = gen_nop ();
33355 emit_insn_before (nop, next_insn);
33356 can_issue_more--;
33359 *group_end = true;
33360 return 0;
33363 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
33365 int n_nops = rs6000_sched_insert_nops;
33367 /* Nops can't be issued from the branch slot, so the effective
33368 issue_rate for nops is 'issue_rate - 1'. */
33369 if (can_issue_more == 0)
33370 can_issue_more = issue_rate;
33371 can_issue_more--;
33372 if (can_issue_more == 0)
33374 can_issue_more = issue_rate - 1;
33375 (*group_count)++;
33376 end = true;
33377 for (i = 0; i < issue_rate; i++)
33379 group_insns[i] = 0;
33383 while (n_nops > 0)
33385 nop = gen_nop ();
33386 emit_insn_before (nop, next_insn);
33387 if (can_issue_more == issue_rate - 1) /* new group begins */
33388 end = false;
33389 can_issue_more--;
33390 if (can_issue_more == 0)
33392 can_issue_more = issue_rate - 1;
33393 (*group_count)++;
33394 end = true;
33395 for (i = 0; i < issue_rate; i++)
33397 group_insns[i] = 0;
33400 n_nops--;
33403 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
33404 can_issue_more++;
33406 /* Is next_insn going to start a new group? */
33407 *group_end
33408 = (end
33409 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33410 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33411 || (can_issue_more < issue_rate &&
33412 insn_terminates_group_p (next_insn, previous_group)));
33413 if (*group_end && end)
33414 (*group_count)--;
33416 if (sched_verbose > 6)
33417 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
33418 *group_count, can_issue_more);
33419 return can_issue_more;
33422 return can_issue_more;
33425 /* This function tries to synch the dispatch groups that the compiler "sees"
33426 with the dispatch groups that the processor dispatcher is expected to
33427 form in practice. It tries to achieve this synchronization by forcing the
33428 estimated processor grouping on the compiler (as opposed to the function
33429 'pad_goups' which tries to force the scheduler's grouping on the processor).
33431 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
33432 examines the (estimated) dispatch groups that will be formed by the processor
33433 dispatcher. It marks these group boundaries to reflect the estimated
33434 processor grouping, overriding the grouping that the scheduler had marked.
33435 Depending on the value of the flag '-minsert-sched-nops' this function can
33436 force certain insns into separate groups or force a certain distance between
33437 them by inserting nops, for example, if there exists a "costly dependence"
33438 between the insns.
33440 The function estimates the group boundaries that the processor will form as
33441 follows: It keeps track of how many vacant issue slots are available after
33442 each insn. A subsequent insn will start a new group if one of the following
33443 4 cases applies:
33444 - no more vacant issue slots remain in the current dispatch group.
33445 - only the last issue slot, which is the branch slot, is vacant, but the next
33446 insn is not a branch.
33447 - only the last 2 or less issue slots, including the branch slot, are vacant,
33448 which means that a cracked insn (which occupies two issue slots) can't be
33449 issued in this group.
33450 - less than 'issue_rate' slots are vacant, and the next insn always needs to
33451 start a new group. */
33453 static int
33454 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33455 rtx_insn *tail)
33457 rtx_insn *insn, *next_insn;
33458 int issue_rate;
33459 int can_issue_more;
33460 int slot, i;
33461 bool group_end;
33462 int group_count = 0;
33463 rtx *group_insns;
33465 /* Initialize. */
33466 issue_rate = rs6000_issue_rate ();
33467 group_insns = XALLOCAVEC (rtx, issue_rate);
33468 for (i = 0; i < issue_rate; i++)
33470 group_insns[i] = 0;
33472 can_issue_more = issue_rate;
33473 slot = 0;
33474 insn = get_next_active_insn (prev_head_insn, tail);
33475 group_end = false;
33477 while (insn != NULL_RTX)
33479 slot = (issue_rate - can_issue_more);
33480 group_insns[slot] = insn;
33481 can_issue_more =
33482 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33483 if (insn_terminates_group_p (insn, current_group))
33484 can_issue_more = 0;
33486 next_insn = get_next_active_insn (insn, tail);
33487 if (next_insn == NULL_RTX)
33488 return group_count + 1;
33490 /* Is next_insn going to start a new group? */
33491 group_end
33492 = (can_issue_more == 0
33493 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33494 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33495 || (can_issue_more < issue_rate &&
33496 insn_terminates_group_p (next_insn, previous_group)));
33498 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
33499 next_insn, &group_end, can_issue_more,
33500 &group_count);
33502 if (group_end)
33504 group_count++;
33505 can_issue_more = 0;
33506 for (i = 0; i < issue_rate; i++)
33508 group_insns[i] = 0;
33512 if (GET_MODE (next_insn) == TImode && can_issue_more)
33513 PUT_MODE (next_insn, VOIDmode);
33514 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
33515 PUT_MODE (next_insn, TImode);
33517 insn = next_insn;
33518 if (can_issue_more == 0)
33519 can_issue_more = issue_rate;
33520 } /* while */
33522 return group_count;
33525 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
33526 dispatch group boundaries that the scheduler had marked. Pad with nops
33527 any dispatch groups which have vacant issue slots, in order to force the
33528 scheduler's grouping on the processor dispatcher. The function
33529 returns the number of dispatch groups found. */
33531 static int
33532 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33533 rtx_insn *tail)
33535 rtx_insn *insn, *next_insn;
33536 rtx nop;
33537 int issue_rate;
33538 int can_issue_more;
33539 int group_end;
33540 int group_count = 0;
33542 /* Initialize issue_rate. */
33543 issue_rate = rs6000_issue_rate ();
33544 can_issue_more = issue_rate;
33546 insn = get_next_active_insn (prev_head_insn, tail);
33547 next_insn = get_next_active_insn (insn, tail);
33549 while (insn != NULL_RTX)
33551 can_issue_more =
33552 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33554 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
33556 if (next_insn == NULL_RTX)
33557 break;
33559 if (group_end)
33561 /* If the scheduler had marked group termination at this location
33562 (between insn and next_insn), and neither insn nor next_insn will
33563 force group termination, pad the group with nops to force group
33564 termination. */
33565 if (can_issue_more
33566 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
33567 && !insn_terminates_group_p (insn, current_group)
33568 && !insn_terminates_group_p (next_insn, previous_group))
33570 if (!is_branch_slot_insn (next_insn))
33571 can_issue_more--;
33573 while (can_issue_more)
33575 nop = gen_nop ();
33576 emit_insn_before (nop, next_insn);
33577 can_issue_more--;
33581 can_issue_more = issue_rate;
33582 group_count++;
33585 insn = next_insn;
33586 next_insn = get_next_active_insn (insn, tail);
33589 return group_count;
33592 /* We're beginning a new block. Initialize data structures as necessary. */
33594 static void
33595 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
33596 int sched_verbose ATTRIBUTE_UNUSED,
33597 int max_ready ATTRIBUTE_UNUSED)
33599 last_scheduled_insn = NULL;
33600 load_store_pendulum = 0;
33601 divide_cnt = 0;
33602 vec_load_pendulum = 0;
33605 /* The following function is called at the end of scheduling BB.
33606 After reload, it inserts nops at insn group bundling. */
33608 static void
33609 rs6000_sched_finish (FILE *dump, int sched_verbose)
33611 int n_groups;
33613 if (sched_verbose)
33614 fprintf (dump, "=== Finishing schedule.\n");
33616 if (reload_completed && rs6000_sched_groups)
33618 /* Do not run sched_finish hook when selective scheduling enabled. */
33619 if (sel_sched_p ())
33620 return;
33622 if (rs6000_sched_insert_nops == sched_finish_none)
33623 return;
33625 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
33626 n_groups = pad_groups (dump, sched_verbose,
33627 current_sched_info->prev_head,
33628 current_sched_info->next_tail);
33629 else
33630 n_groups = redefine_groups (dump, sched_verbose,
33631 current_sched_info->prev_head,
33632 current_sched_info->next_tail);
33634 if (sched_verbose >= 6)
33636 fprintf (dump, "ngroups = %d\n", n_groups);
33637 print_rtl (dump, current_sched_info->prev_head);
33638 fprintf (dump, "Done finish_sched\n");
33643 struct rs6000_sched_context
33645 short cached_can_issue_more;
33646 rtx_insn *last_scheduled_insn;
33647 int load_store_pendulum;
33648 int divide_cnt;
33649 int vec_load_pendulum;
33652 typedef struct rs6000_sched_context rs6000_sched_context_def;
33653 typedef rs6000_sched_context_def *rs6000_sched_context_t;
33655 /* Allocate store for new scheduling context. */
33656 static void *
33657 rs6000_alloc_sched_context (void)
33659 return xmalloc (sizeof (rs6000_sched_context_def));
33662 /* If CLEAN_P is true then initializes _SC with clean data,
33663 and from the global context otherwise. */
33664 static void
33665 rs6000_init_sched_context (void *_sc, bool clean_p)
33667 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33669 if (clean_p)
33671 sc->cached_can_issue_more = 0;
33672 sc->last_scheduled_insn = NULL;
33673 sc->load_store_pendulum = 0;
33674 sc->divide_cnt = 0;
33675 sc->vec_load_pendulum = 0;
33677 else
33679 sc->cached_can_issue_more = cached_can_issue_more;
33680 sc->last_scheduled_insn = last_scheduled_insn;
33681 sc->load_store_pendulum = load_store_pendulum;
33682 sc->divide_cnt = divide_cnt;
33683 sc->vec_load_pendulum = vec_load_pendulum;
33687 /* Sets the global scheduling context to the one pointed to by _SC. */
33688 static void
33689 rs6000_set_sched_context (void *_sc)
33691 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33693 gcc_assert (sc != NULL);
33695 cached_can_issue_more = sc->cached_can_issue_more;
33696 last_scheduled_insn = sc->last_scheduled_insn;
33697 load_store_pendulum = sc->load_store_pendulum;
33698 divide_cnt = sc->divide_cnt;
33699 vec_load_pendulum = sc->vec_load_pendulum;
33702 /* Free _SC. */
33703 static void
33704 rs6000_free_sched_context (void *_sc)
33706 gcc_assert (_sc != NULL);
33708 free (_sc);
33712 /* Length in units of the trampoline for entering a nested function. */
33715 rs6000_trampoline_size (void)
33717 int ret = 0;
33719 switch (DEFAULT_ABI)
33721 default:
33722 gcc_unreachable ();
33724 case ABI_AIX:
33725 ret = (TARGET_32BIT) ? 12 : 24;
33726 break;
33728 case ABI_ELFv2:
33729 gcc_assert (!TARGET_32BIT);
33730 ret = 32;
33731 break;
33733 case ABI_DARWIN:
33734 case ABI_V4:
33735 ret = (TARGET_32BIT) ? 40 : 48;
33736 break;
33739 return ret;
33742 /* Emit RTL insns to initialize the variable parts of a trampoline.
33743 FNADDR is an RTX for the address of the function's pure code.
33744 CXT is an RTX for the static chain value for the function. */
33746 static void
33747 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
33749 int regsize = (TARGET_32BIT) ? 4 : 8;
33750 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
33751 rtx ctx_reg = force_reg (Pmode, cxt);
33752 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
33754 switch (DEFAULT_ABI)
33756 default:
33757 gcc_unreachable ();
33759 /* Under AIX, just build the 3 word function descriptor */
33760 case ABI_AIX:
33762 rtx fnmem, fn_reg, toc_reg;
33764 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33765 error ("You cannot take the address of a nested function if you use "
33766 "the -mno-pointers-to-nested-functions option.");
33768 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
33769 fn_reg = gen_reg_rtx (Pmode);
33770 toc_reg = gen_reg_rtx (Pmode);
33772 /* Macro to shorten the code expansions below. */
33773 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
33775 m_tramp = replace_equiv_address (m_tramp, addr);
33777 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
33778 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
33779 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
33780 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
33781 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
33783 # undef MEM_PLUS
33785 break;
33787 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
33788 case ABI_ELFv2:
33789 case ABI_DARWIN:
33790 case ABI_V4:
33791 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
33792 LCT_NORMAL, VOIDmode, 4,
33793 addr, Pmode,
33794 GEN_INT (rs6000_trampoline_size ()), SImode,
33795 fnaddr, Pmode,
33796 ctx_reg, Pmode);
33797 break;
33802 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
33803 identifier as an argument, so the front end shouldn't look it up. */
33805 static bool
33806 rs6000_attribute_takes_identifier_p (const_tree attr_id)
33808 return is_attribute_p ("altivec", attr_id);
33811 /* Handle the "altivec" attribute. The attribute may have
33812 arguments as follows:
33814 __attribute__((altivec(vector__)))
33815 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
33816 __attribute__((altivec(bool__))) (always followed by 'unsigned')
33818 and may appear more than once (e.g., 'vector bool char') in a
33819 given declaration. */
33821 static tree
33822 rs6000_handle_altivec_attribute (tree *node,
33823 tree name ATTRIBUTE_UNUSED,
33824 tree args,
33825 int flags ATTRIBUTE_UNUSED,
33826 bool *no_add_attrs)
33828 tree type = *node, result = NULL_TREE;
33829 machine_mode mode;
33830 int unsigned_p;
33831 char altivec_type
33832 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
33833 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
33834 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
33835 : '?');
33837 while (POINTER_TYPE_P (type)
33838 || TREE_CODE (type) == FUNCTION_TYPE
33839 || TREE_CODE (type) == METHOD_TYPE
33840 || TREE_CODE (type) == ARRAY_TYPE)
33841 type = TREE_TYPE (type);
33843 mode = TYPE_MODE (type);
33845 /* Check for invalid AltiVec type qualifiers. */
33846 if (type == long_double_type_node)
33847 error ("use of %<long double%> in AltiVec types is invalid");
33848 else if (type == boolean_type_node)
33849 error ("use of boolean types in AltiVec types is invalid");
33850 else if (TREE_CODE (type) == COMPLEX_TYPE)
33851 error ("use of %<complex%> in AltiVec types is invalid");
33852 else if (DECIMAL_FLOAT_MODE_P (mode))
33853 error ("use of decimal floating point types in AltiVec types is invalid");
33854 else if (!TARGET_VSX)
33856 if (type == long_unsigned_type_node || type == long_integer_type_node)
33858 if (TARGET_64BIT)
33859 error ("use of %<long%> in AltiVec types is invalid for "
33860 "64-bit code without -mvsx");
33861 else if (rs6000_warn_altivec_long)
33862 warning (0, "use of %<long%> in AltiVec types is deprecated; "
33863 "use %<int%>");
33865 else if (type == long_long_unsigned_type_node
33866 || type == long_long_integer_type_node)
33867 error ("use of %<long long%> in AltiVec types is invalid without "
33868 "-mvsx");
33869 else if (type == double_type_node)
33870 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
33873 switch (altivec_type)
33875 case 'v':
33876 unsigned_p = TYPE_UNSIGNED (type);
33877 switch (mode)
33879 case TImode:
33880 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
33881 break;
33882 case DImode:
33883 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
33884 break;
33885 case SImode:
33886 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
33887 break;
33888 case HImode:
33889 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
33890 break;
33891 case QImode:
33892 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
33893 break;
33894 case SFmode: result = V4SF_type_node; break;
33895 case DFmode: result = V2DF_type_node; break;
33896 /* If the user says 'vector int bool', we may be handed the 'bool'
33897 attribute _before_ the 'vector' attribute, and so select the
33898 proper type in the 'b' case below. */
33899 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
33900 case V2DImode: case V2DFmode:
33901 result = type;
33902 default: break;
33904 break;
33905 case 'b':
33906 switch (mode)
33908 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
33909 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
33910 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
33911 case QImode: case V16QImode: result = bool_V16QI_type_node;
33912 default: break;
33914 break;
33915 case 'p':
33916 switch (mode)
33918 case V8HImode: result = pixel_V8HI_type_node;
33919 default: break;
33921 default: break;
33924 /* Propagate qualifiers attached to the element type
33925 onto the vector type. */
33926 if (result && result != type && TYPE_QUALS (type))
33927 result = build_qualified_type (result, TYPE_QUALS (type));
33929 *no_add_attrs = true; /* No need to hang on to the attribute. */
33931 if (result)
33932 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
33934 return NULL_TREE;
33937 /* AltiVec defines four built-in scalar types that serve as vector
33938 elements; we must teach the compiler how to mangle them. */
33940 static const char *
33941 rs6000_mangle_type (const_tree type)
33943 type = TYPE_MAIN_VARIANT (type);
33945 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33946 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33947 return NULL;
33949 if (type == bool_char_type_node) return "U6__boolc";
33950 if (type == bool_short_type_node) return "U6__bools";
33951 if (type == pixel_type_node) return "u7__pixel";
33952 if (type == bool_int_type_node) return "U6__booli";
33953 if (type == bool_long_type_node) return "U6__booll";
33955 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
33956 "g" for IBM extended double, no matter whether it is long double (using
33957 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
33958 if (TARGET_FLOAT128_TYPE)
33960 if (type == ieee128_float_type_node)
33961 return "U10__float128";
33963 if (type == ibm128_float_type_node)
33964 return "g";
33966 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
33967 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
33970 /* Mangle IBM extended float long double as `g' (__float128) on
33971 powerpc*-linux where long-double-64 previously was the default. */
33972 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
33973 && TARGET_ELF
33974 && TARGET_LONG_DOUBLE_128
33975 && !TARGET_IEEEQUAD)
33976 return "g";
33978 /* For all other types, use normal C++ mangling. */
33979 return NULL;
33982 /* Handle a "longcall" or "shortcall" attribute; arguments as in
33983 struct attribute_spec.handler. */
33985 static tree
33986 rs6000_handle_longcall_attribute (tree *node, tree name,
33987 tree args ATTRIBUTE_UNUSED,
33988 int flags ATTRIBUTE_UNUSED,
33989 bool *no_add_attrs)
33991 if (TREE_CODE (*node) != FUNCTION_TYPE
33992 && TREE_CODE (*node) != FIELD_DECL
33993 && TREE_CODE (*node) != TYPE_DECL)
33995 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33996 name);
33997 *no_add_attrs = true;
34000 return NULL_TREE;
34003 /* Set longcall attributes on all functions declared when
34004 rs6000_default_long_calls is true. */
34005 static void
34006 rs6000_set_default_type_attributes (tree type)
34008 if (rs6000_default_long_calls
34009 && (TREE_CODE (type) == FUNCTION_TYPE
34010 || TREE_CODE (type) == METHOD_TYPE))
34011 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
34012 NULL_TREE,
34013 TYPE_ATTRIBUTES (type));
34015 #if TARGET_MACHO
34016 darwin_set_default_type_attributes (type);
34017 #endif
34020 /* Return a reference suitable for calling a function with the
34021 longcall attribute. */
34024 rs6000_longcall_ref (rtx call_ref)
34026 const char *call_name;
34027 tree node;
34029 if (GET_CODE (call_ref) != SYMBOL_REF)
34030 return call_ref;
34032 /* System V adds '.' to the internal name, so skip them. */
34033 call_name = XSTR (call_ref, 0);
34034 if (*call_name == '.')
34036 while (*call_name == '.')
34037 call_name++;
34039 node = get_identifier (call_name);
34040 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
34043 return force_reg (Pmode, call_ref);
34046 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
34047 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
34048 #endif
34050 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34051 struct attribute_spec.handler. */
34052 static tree
34053 rs6000_handle_struct_attribute (tree *node, tree name,
34054 tree args ATTRIBUTE_UNUSED,
34055 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34057 tree *type = NULL;
34058 if (DECL_P (*node))
34060 if (TREE_CODE (*node) == TYPE_DECL)
34061 type = &TREE_TYPE (*node);
34063 else
34064 type = node;
34066 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
34067 || TREE_CODE (*type) == UNION_TYPE)))
34069 warning (OPT_Wattributes, "%qE attribute ignored", name);
34070 *no_add_attrs = true;
34073 else if ((is_attribute_p ("ms_struct", name)
34074 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
34075 || ((is_attribute_p ("gcc_struct", name)
34076 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
34078 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
34079 name);
34080 *no_add_attrs = true;
34083 return NULL_TREE;
34086 static bool
34087 rs6000_ms_bitfield_layout_p (const_tree record_type)
34089 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
34090 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
34091 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
34094 #ifdef USING_ELFOS_H
34096 /* A get_unnamed_section callback, used for switching to toc_section. */
34098 static void
34099 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34101 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34102 && TARGET_MINIMAL_TOC)
34104 if (!toc_initialized)
34106 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34107 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34108 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
34109 fprintf (asm_out_file, "\t.tc ");
34110 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
34111 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34112 fprintf (asm_out_file, "\n");
34114 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34115 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34116 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34117 fprintf (asm_out_file, " = .+32768\n");
34118 toc_initialized = 1;
34120 else
34121 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34123 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34125 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34126 if (!toc_initialized)
34128 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34129 toc_initialized = 1;
34132 else
34134 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34135 if (!toc_initialized)
34137 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34138 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34139 fprintf (asm_out_file, " = .+32768\n");
34140 toc_initialized = 1;
34145 /* Implement TARGET_ASM_INIT_SECTIONS. */
34147 static void
34148 rs6000_elf_asm_init_sections (void)
34150 toc_section
34151 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
34153 sdata2_section
34154 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
34155 SDATA2_SECTION_ASM_OP);
34158 /* Implement TARGET_SELECT_RTX_SECTION. */
34160 static section *
34161 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
34162 unsigned HOST_WIDE_INT align)
34164 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34165 return toc_section;
34166 else
34167 return default_elf_select_rtx_section (mode, x, align);
34170 /* For a SYMBOL_REF, set generic flags and then perform some
34171 target-specific processing.
34173 When the AIX ABI is requested on a non-AIX system, replace the
34174 function name with the real name (with a leading .) rather than the
34175 function descriptor name. This saves a lot of overriding code to
34176 read the prefixes. */
34178 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
34179 static void
34180 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
34182 default_encode_section_info (decl, rtl, first);
34184 if (first
34185 && TREE_CODE (decl) == FUNCTION_DECL
34186 && !TARGET_AIX
34187 && DEFAULT_ABI == ABI_AIX)
34189 rtx sym_ref = XEXP (rtl, 0);
34190 size_t len = strlen (XSTR (sym_ref, 0));
34191 char *str = XALLOCAVEC (char, len + 2);
34192 str[0] = '.';
34193 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
34194 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
34198 static inline bool
34199 compare_section_name (const char *section, const char *templ)
34201 int len;
34203 len = strlen (templ);
34204 return (strncmp (section, templ, len) == 0
34205 && (section[len] == 0 || section[len] == '.'));
34208 bool
34209 rs6000_elf_in_small_data_p (const_tree decl)
34211 if (rs6000_sdata == SDATA_NONE)
34212 return false;
34214 /* We want to merge strings, so we never consider them small data. */
34215 if (TREE_CODE (decl) == STRING_CST)
34216 return false;
34218 /* Functions are never in the small data area. */
34219 if (TREE_CODE (decl) == FUNCTION_DECL)
34220 return false;
34222 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
34224 const char *section = DECL_SECTION_NAME (decl);
34225 if (compare_section_name (section, ".sdata")
34226 || compare_section_name (section, ".sdata2")
34227 || compare_section_name (section, ".gnu.linkonce.s")
34228 || compare_section_name (section, ".sbss")
34229 || compare_section_name (section, ".sbss2")
34230 || compare_section_name (section, ".gnu.linkonce.sb")
34231 || strcmp (section, ".PPC.EMB.sdata0") == 0
34232 || strcmp (section, ".PPC.EMB.sbss0") == 0)
34233 return true;
34235 else
34237 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
34239 if (size > 0
34240 && size <= g_switch_value
34241 /* If it's not public, and we're not going to reference it there,
34242 there's no need to put it in the small data section. */
34243 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
34244 return true;
34247 return false;
34250 #endif /* USING_ELFOS_H */
34252 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
34254 static bool
34255 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
34257 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
34260 /* Do not place thread-local symbols refs in the object blocks. */
34262 static bool
34263 rs6000_use_blocks_for_decl_p (const_tree decl)
34265 return !DECL_THREAD_LOCAL_P (decl);
34268 /* Return a REG that occurs in ADDR with coefficient 1.
34269 ADDR can be effectively incremented by incrementing REG.
34271 r0 is special and we must not select it as an address
34272 register by this routine since our caller will try to
34273 increment the returned register via an "la" instruction. */
34276 find_addr_reg (rtx addr)
34278 while (GET_CODE (addr) == PLUS)
34280 if (GET_CODE (XEXP (addr, 0)) == REG
34281 && REGNO (XEXP (addr, 0)) != 0)
34282 addr = XEXP (addr, 0);
34283 else if (GET_CODE (XEXP (addr, 1)) == REG
34284 && REGNO (XEXP (addr, 1)) != 0)
34285 addr = XEXP (addr, 1);
34286 else if (CONSTANT_P (XEXP (addr, 0)))
34287 addr = XEXP (addr, 1);
34288 else if (CONSTANT_P (XEXP (addr, 1)))
34289 addr = XEXP (addr, 0);
34290 else
34291 gcc_unreachable ();
34293 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
34294 return addr;
34297 void
34298 rs6000_fatal_bad_address (rtx op)
34300 fatal_insn ("bad address", op);
34303 #if TARGET_MACHO
34305 typedef struct branch_island_d {
34306 tree function_name;
34307 tree label_name;
34308 int line_number;
34309 } branch_island;
34312 static vec<branch_island, va_gc> *branch_islands;
34314 /* Remember to generate a branch island for far calls to the given
34315 function. */
34317 static void
34318 add_compiler_branch_island (tree label_name, tree function_name,
34319 int line_number)
34321 branch_island bi = {function_name, label_name, line_number};
34322 vec_safe_push (branch_islands, bi);
34325 /* Generate far-jump branch islands for everything recorded in
34326 branch_islands. Invoked immediately after the last instruction of
34327 the epilogue has been emitted; the branch islands must be appended
34328 to, and contiguous with, the function body. Mach-O stubs are
34329 generated in machopic_output_stub(). */
34331 static void
34332 macho_branch_islands (void)
34334 char tmp_buf[512];
34336 while (!vec_safe_is_empty (branch_islands))
34338 branch_island *bi = &branch_islands->last ();
34339 const char *label = IDENTIFIER_POINTER (bi->label_name);
34340 const char *name = IDENTIFIER_POINTER (bi->function_name);
34341 char name_buf[512];
34342 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
34343 if (name[0] == '*' || name[0] == '&')
34344 strcpy (name_buf, name+1);
34345 else
34347 name_buf[0] = '_';
34348 strcpy (name_buf+1, name);
34350 strcpy (tmp_buf, "\n");
34351 strcat (tmp_buf, label);
34352 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34353 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34354 dbxout_stabd (N_SLINE, bi->line_number);
34355 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34356 if (flag_pic)
34358 if (TARGET_LINK_STACK)
34360 char name[32];
34361 get_ppc476_thunk_name (name);
34362 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
34363 strcat (tmp_buf, name);
34364 strcat (tmp_buf, "\n");
34365 strcat (tmp_buf, label);
34366 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34368 else
34370 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
34371 strcat (tmp_buf, label);
34372 strcat (tmp_buf, "_pic\n");
34373 strcat (tmp_buf, label);
34374 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34377 strcat (tmp_buf, "\taddis r11,r11,ha16(");
34378 strcat (tmp_buf, name_buf);
34379 strcat (tmp_buf, " - ");
34380 strcat (tmp_buf, label);
34381 strcat (tmp_buf, "_pic)\n");
34383 strcat (tmp_buf, "\tmtlr r0\n");
34385 strcat (tmp_buf, "\taddi r12,r11,lo16(");
34386 strcat (tmp_buf, name_buf);
34387 strcat (tmp_buf, " - ");
34388 strcat (tmp_buf, label);
34389 strcat (tmp_buf, "_pic)\n");
34391 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
34393 else
34395 strcat (tmp_buf, ":\nlis r12,hi16(");
34396 strcat (tmp_buf, name_buf);
34397 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
34398 strcat (tmp_buf, name_buf);
34399 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
34401 output_asm_insn (tmp_buf, 0);
34402 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34403 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34404 dbxout_stabd (N_SLINE, bi->line_number);
34405 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34406 branch_islands->pop ();
34410 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
34411 already there or not. */
34413 static int
34414 no_previous_def (tree function_name)
34416 branch_island *bi;
34417 unsigned ix;
34419 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34420 if (function_name == bi->function_name)
34421 return 0;
34422 return 1;
34425 /* GET_PREV_LABEL gets the label name from the previous definition of
34426 the function. */
34428 static tree
34429 get_prev_label (tree function_name)
34431 branch_island *bi;
34432 unsigned ix;
34434 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34435 if (function_name == bi->function_name)
34436 return bi->label_name;
34437 return NULL_TREE;
34440 /* INSN is either a function call or a millicode call. It may have an
34441 unconditional jump in its delay slot.
34443 CALL_DEST is the routine we are calling. */
34445 char *
34446 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
34447 int cookie_operand_number)
34449 static char buf[256];
34450 if (darwin_emit_branch_islands
34451 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
34452 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
34454 tree labelname;
34455 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
34457 if (no_previous_def (funname))
34459 rtx label_rtx = gen_label_rtx ();
34460 char *label_buf, temp_buf[256];
34461 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
34462 CODE_LABEL_NUMBER (label_rtx));
34463 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
34464 labelname = get_identifier (label_buf);
34465 add_compiler_branch_island (labelname, funname, insn_line (insn));
34467 else
34468 labelname = get_prev_label (funname);
34470 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
34471 instruction will reach 'foo', otherwise link as 'bl L42'".
34472 "L42" should be a 'branch island', that will do a far jump to
34473 'foo'. Branch islands are generated in
34474 macho_branch_islands(). */
34475 sprintf (buf, "jbsr %%z%d,%.246s",
34476 dest_operand_number, IDENTIFIER_POINTER (labelname));
34478 else
34479 sprintf (buf, "bl %%z%d", dest_operand_number);
34480 return buf;
34483 /* Generate PIC and indirect symbol stubs. */
34485 void
34486 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34488 unsigned int length;
34489 char *symbol_name, *lazy_ptr_name;
34490 char *local_label_0;
34491 static int label = 0;
34493 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34494 symb = (*targetm.strip_name_encoding) (symb);
34497 length = strlen (symb);
34498 symbol_name = XALLOCAVEC (char, length + 32);
34499 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34501 lazy_ptr_name = XALLOCAVEC (char, length + 32);
34502 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
34504 if (flag_pic == 2)
34505 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
34506 else
34507 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
34509 if (flag_pic == 2)
34511 fprintf (file, "\t.align 5\n");
34513 fprintf (file, "%s:\n", stub);
34514 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34516 label++;
34517 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
34518 sprintf (local_label_0, "\"L%011d$spb\"", label);
34520 fprintf (file, "\tmflr r0\n");
34521 if (TARGET_LINK_STACK)
34523 char name[32];
34524 get_ppc476_thunk_name (name);
34525 fprintf (file, "\tbl %s\n", name);
34526 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34528 else
34530 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
34531 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34533 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
34534 lazy_ptr_name, local_label_0);
34535 fprintf (file, "\tmtlr r0\n");
34536 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
34537 (TARGET_64BIT ? "ldu" : "lwzu"),
34538 lazy_ptr_name, local_label_0);
34539 fprintf (file, "\tmtctr r12\n");
34540 fprintf (file, "\tbctr\n");
34542 else
34544 fprintf (file, "\t.align 4\n");
34546 fprintf (file, "%s:\n", stub);
34547 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34549 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
34550 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
34551 (TARGET_64BIT ? "ldu" : "lwzu"),
34552 lazy_ptr_name);
34553 fprintf (file, "\tmtctr r12\n");
34554 fprintf (file, "\tbctr\n");
34557 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
34558 fprintf (file, "%s:\n", lazy_ptr_name);
34559 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34560 fprintf (file, "%sdyld_stub_binding_helper\n",
34561 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
34564 /* Legitimize PIC addresses. If the address is already
34565 position-independent, we return ORIG. Newly generated
34566 position-independent addresses go into a reg. This is REG if non
34567 zero, otherwise we allocate register(s) as necessary. */
34569 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
34572 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
34573 rtx reg)
34575 rtx base, offset;
34577 if (reg == NULL && ! reload_in_progress && ! reload_completed)
34578 reg = gen_reg_rtx (Pmode);
34580 if (GET_CODE (orig) == CONST)
34582 rtx reg_temp;
34584 if (GET_CODE (XEXP (orig, 0)) == PLUS
34585 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
34586 return orig;
34588 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
34590 /* Use a different reg for the intermediate value, as
34591 it will be marked UNCHANGING. */
34592 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
34593 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
34594 Pmode, reg_temp);
34595 offset =
34596 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
34597 Pmode, reg);
34599 if (GET_CODE (offset) == CONST_INT)
34601 if (SMALL_INT (offset))
34602 return plus_constant (Pmode, base, INTVAL (offset));
34603 else if (! reload_in_progress && ! reload_completed)
34604 offset = force_reg (Pmode, offset);
34605 else
34607 rtx mem = force_const_mem (Pmode, orig);
34608 return machopic_legitimize_pic_address (mem, Pmode, reg);
34611 return gen_rtx_PLUS (Pmode, base, offset);
34614 /* Fall back on generic machopic code. */
34615 return machopic_legitimize_pic_address (orig, mode, reg);
34618 /* Output a .machine directive for the Darwin assembler, and call
34619 the generic start_file routine. */
34621 static void
34622 rs6000_darwin_file_start (void)
34624 static const struct
34626 const char *arg;
34627 const char *name;
34628 HOST_WIDE_INT if_set;
34629 } mapping[] = {
34630 { "ppc64", "ppc64", MASK_64BIT },
34631 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
34632 { "power4", "ppc970", 0 },
34633 { "G5", "ppc970", 0 },
34634 { "7450", "ppc7450", 0 },
34635 { "7400", "ppc7400", MASK_ALTIVEC },
34636 { "G4", "ppc7400", 0 },
34637 { "750", "ppc750", 0 },
34638 { "740", "ppc750", 0 },
34639 { "G3", "ppc750", 0 },
34640 { "604e", "ppc604e", 0 },
34641 { "604", "ppc604", 0 },
34642 { "603e", "ppc603", 0 },
34643 { "603", "ppc603", 0 },
34644 { "601", "ppc601", 0 },
34645 { NULL, "ppc", 0 } };
34646 const char *cpu_id = "";
34647 size_t i;
34649 rs6000_file_start ();
34650 darwin_file_start ();
34652 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
34654 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
34655 cpu_id = rs6000_default_cpu;
34657 if (global_options_set.x_rs6000_cpu_index)
34658 cpu_id = processor_target_table[rs6000_cpu_index].name;
34660 /* Look through the mapping array. Pick the first name that either
34661 matches the argument, has a bit set in IF_SET that is also set
34662 in the target flags, or has a NULL name. */
34664 i = 0;
34665 while (mapping[i].arg != NULL
34666 && strcmp (mapping[i].arg, cpu_id) != 0
34667 && (mapping[i].if_set & rs6000_isa_flags) == 0)
34668 i++;
34670 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
34673 #endif /* TARGET_MACHO */
34675 #if TARGET_ELF
34676 static int
34677 rs6000_elf_reloc_rw_mask (void)
34679 if (flag_pic)
34680 return 3;
34681 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34682 return 2;
34683 else
34684 return 0;
34687 /* Record an element in the table of global constructors. SYMBOL is
34688 a SYMBOL_REF of the function to be called; PRIORITY is a number
34689 between 0 and MAX_INIT_PRIORITY.
34691 This differs from default_named_section_asm_out_constructor in
34692 that we have special handling for -mrelocatable. */
34694 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
34695 static void
34696 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
34698 const char *section = ".ctors";
34699 char buf[18];
34701 if (priority != DEFAULT_INIT_PRIORITY)
34703 sprintf (buf, ".ctors.%.5u",
34704 /* Invert the numbering so the linker puts us in the proper
34705 order; constructors are run from right to left, and the
34706 linker sorts in increasing order. */
34707 MAX_INIT_PRIORITY - priority);
34708 section = buf;
34711 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34712 assemble_align (POINTER_SIZE);
34714 if (DEFAULT_ABI == ABI_V4
34715 && (TARGET_RELOCATABLE || flag_pic > 1))
34717 fputs ("\t.long (", asm_out_file);
34718 output_addr_const (asm_out_file, symbol);
34719 fputs (")@fixup\n", asm_out_file);
34721 else
34722 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34725 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
34726 static void
34727 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
34729 const char *section = ".dtors";
34730 char buf[18];
34732 if (priority != DEFAULT_INIT_PRIORITY)
34734 sprintf (buf, ".dtors.%.5u",
34735 /* Invert the numbering so the linker puts us in the proper
34736 order; constructors are run from right to left, and the
34737 linker sorts in increasing order. */
34738 MAX_INIT_PRIORITY - priority);
34739 section = buf;
34742 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34743 assemble_align (POINTER_SIZE);
34745 if (DEFAULT_ABI == ABI_V4
34746 && (TARGET_RELOCATABLE || flag_pic > 1))
34748 fputs ("\t.long (", asm_out_file);
34749 output_addr_const (asm_out_file, symbol);
34750 fputs (")@fixup\n", asm_out_file);
34752 else
34753 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34756 void
34757 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
34759 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
34761 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
34762 ASM_OUTPUT_LABEL (file, name);
34763 fputs (DOUBLE_INT_ASM_OP, file);
34764 rs6000_output_function_entry (file, name);
34765 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
34766 if (DOT_SYMBOLS)
34768 fputs ("\t.size\t", file);
34769 assemble_name (file, name);
34770 fputs (",24\n\t.type\t.", file);
34771 assemble_name (file, name);
34772 fputs (",@function\n", file);
34773 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
34775 fputs ("\t.globl\t.", file);
34776 assemble_name (file, name);
34777 putc ('\n', file);
34780 else
34781 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34782 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34783 rs6000_output_function_entry (file, name);
34784 fputs (":\n", file);
34785 return;
34788 if (DEFAULT_ABI == ABI_V4
34789 && (TARGET_RELOCATABLE || flag_pic > 1)
34790 && !TARGET_SECURE_PLT
34791 && (get_pool_size () != 0 || crtl->profile)
34792 && uses_TOC ())
34794 char buf[256];
34796 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34798 fprintf (file, "\t.long ");
34799 assemble_name (file, toc_label_name);
34800 need_toc_init = 1;
34801 putc ('-', file);
34802 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34803 assemble_name (file, buf);
34804 putc ('\n', file);
34807 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34808 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34810 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
34812 char buf[256];
34814 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34816 fprintf (file, "\t.quad .TOC.-");
34817 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34818 assemble_name (file, buf);
34819 putc ('\n', file);
34822 if (DEFAULT_ABI == ABI_AIX)
34824 const char *desc_name, *orig_name;
34826 orig_name = (*targetm.strip_name_encoding) (name);
34827 desc_name = orig_name;
34828 while (*desc_name == '.')
34829 desc_name++;
34831 if (TREE_PUBLIC (decl))
34832 fprintf (file, "\t.globl %s\n", desc_name);
34834 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34835 fprintf (file, "%s:\n", desc_name);
34836 fprintf (file, "\t.long %s\n", orig_name);
34837 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
34838 fputs ("\t.long 0\n", file);
34839 fprintf (file, "\t.previous\n");
34841 ASM_OUTPUT_LABEL (file, name);
34844 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
34845 static void
34846 rs6000_elf_file_end (void)
34848 #ifdef HAVE_AS_GNU_ATTRIBUTE
34849 /* ??? The value emitted depends on options active at file end.
34850 Assume anyone using #pragma or attributes that might change
34851 options knows what they are doing. */
34852 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
34853 && rs6000_passes_float)
34855 int fp;
34857 if (TARGET_DF_FPR | TARGET_DF_SPE)
34858 fp = 1;
34859 else if (TARGET_SF_FPR | TARGET_SF_SPE)
34860 fp = 3;
34861 else
34862 fp = 2;
34863 if (rs6000_passes_long_double)
34865 if (!TARGET_LONG_DOUBLE_128)
34866 fp |= 2 * 4;
34867 else if (TARGET_IEEEQUAD)
34868 fp |= 3 * 4;
34869 else
34870 fp |= 1 * 4;
34872 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
34874 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
34876 if (rs6000_passes_vector)
34877 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
34878 (TARGET_ALTIVEC_ABI ? 2
34879 : TARGET_SPE_ABI ? 3
34880 : 1));
34881 if (rs6000_returns_struct)
34882 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
34883 aix_struct_return ? 2 : 1);
34885 #endif
34886 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
34887 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
34888 file_end_indicate_exec_stack ();
34889 #endif
34891 if (flag_split_stack)
34892 file_end_indicate_split_stack ();
34894 if (cpu_builtin_p)
34896 /* We have expanded a CPU builtin, so we need to emit a reference to
34897 the special symbol that LIBC uses to declare it supports the
34898 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
34899 switch_to_section (data_section);
34900 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
34901 fprintf (asm_out_file, "\t%s %s\n",
34902 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
34905 #endif
34907 #if TARGET_XCOFF
34909 #ifndef HAVE_XCOFF_DWARF_EXTRAS
34910 #define HAVE_XCOFF_DWARF_EXTRAS 0
34911 #endif
34913 static enum unwind_info_type
34914 rs6000_xcoff_debug_unwind_info (void)
34916 return UI_NONE;
34919 static void
34920 rs6000_xcoff_asm_output_anchor (rtx symbol)
34922 char buffer[100];
34924 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
34925 SYMBOL_REF_BLOCK_OFFSET (symbol));
34926 fprintf (asm_out_file, "%s", SET_ASM_OP);
34927 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
34928 fprintf (asm_out_file, ",");
34929 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
34930 fprintf (asm_out_file, "\n");
34933 static void
34934 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
34936 fputs (GLOBAL_ASM_OP, stream);
34937 RS6000_OUTPUT_BASENAME (stream, name);
34938 putc ('\n', stream);
34941 /* A get_unnamed_decl callback, used for read-only sections. PTR
34942 points to the section string variable. */
34944 static void
34945 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
34947 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
34948 *(const char *const *) directive,
34949 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34952 /* Likewise for read-write sections. */
34954 static void
34955 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
34957 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
34958 *(const char *const *) directive,
34959 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34962 static void
34963 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
34965 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
34966 *(const char *const *) directive,
34967 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34970 /* A get_unnamed_section callback, used for switching to toc_section. */
34972 static void
34973 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34975 if (TARGET_MINIMAL_TOC)
34977 /* toc_section is always selected at least once from
34978 rs6000_xcoff_file_start, so this is guaranteed to
34979 always be defined once and only once in each file. */
34980 if (!toc_initialized)
34982 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
34983 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
34984 toc_initialized = 1;
34986 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
34987 (TARGET_32BIT ? "" : ",3"));
34989 else
34990 fputs ("\t.toc\n", asm_out_file);
34993 /* Implement TARGET_ASM_INIT_SECTIONS. */
34995 static void
34996 rs6000_xcoff_asm_init_sections (void)
34998 read_only_data_section
34999 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35000 &xcoff_read_only_section_name);
35002 private_data_section
35003 = get_unnamed_section (SECTION_WRITE,
35004 rs6000_xcoff_output_readwrite_section_asm_op,
35005 &xcoff_private_data_section_name);
35007 tls_data_section
35008 = get_unnamed_section (SECTION_TLS,
35009 rs6000_xcoff_output_tls_section_asm_op,
35010 &xcoff_tls_data_section_name);
35012 tls_private_data_section
35013 = get_unnamed_section (SECTION_TLS,
35014 rs6000_xcoff_output_tls_section_asm_op,
35015 &xcoff_private_data_section_name);
35017 read_only_private_data_section
35018 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35019 &xcoff_private_data_section_name);
35021 toc_section
35022 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
35024 readonly_data_section = read_only_data_section;
35027 static int
35028 rs6000_xcoff_reloc_rw_mask (void)
35030 return 3;
35033 static void
35034 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
35035 tree decl ATTRIBUTE_UNUSED)
35037 int smclass;
35038 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
35040 if (flags & SECTION_EXCLUDE)
35041 smclass = 4;
35042 else if (flags & SECTION_DEBUG)
35044 fprintf (asm_out_file, "\t.dwsect %s\n", name);
35045 return;
35047 else if (flags & SECTION_CODE)
35048 smclass = 0;
35049 else if (flags & SECTION_TLS)
35050 smclass = 3;
35051 else if (flags & SECTION_WRITE)
35052 smclass = 2;
35053 else
35054 smclass = 1;
35056 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
35057 (flags & SECTION_CODE) ? "." : "",
35058 name, suffix[smclass], flags & SECTION_ENTSIZE);
35061 #define IN_NAMED_SECTION(DECL) \
35062 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
35063 && DECL_SECTION_NAME (DECL) != NULL)
35065 static section *
35066 rs6000_xcoff_select_section (tree decl, int reloc,
35067 unsigned HOST_WIDE_INT align)
35069 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
35070 named section. */
35071 if (align > BIGGEST_ALIGNMENT)
35073 resolve_unique_section (decl, reloc, true);
35074 if (IN_NAMED_SECTION (decl))
35075 return get_named_section (decl, NULL, reloc);
35078 if (decl_readonly_section (decl, reloc))
35080 if (TREE_PUBLIC (decl))
35081 return read_only_data_section;
35082 else
35083 return read_only_private_data_section;
35085 else
35087 #if HAVE_AS_TLS
35088 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35090 if (TREE_PUBLIC (decl))
35091 return tls_data_section;
35092 else if (bss_initializer_p (decl))
35094 /* Convert to COMMON to emit in BSS. */
35095 DECL_COMMON (decl) = 1;
35096 return tls_comm_section;
35098 else
35099 return tls_private_data_section;
35101 else
35102 #endif
35103 if (TREE_PUBLIC (decl))
35104 return data_section;
35105 else
35106 return private_data_section;
35110 static void
35111 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
35113 const char *name;
35115 /* Use select_section for private data and uninitialized data with
35116 alignment <= BIGGEST_ALIGNMENT. */
35117 if (!TREE_PUBLIC (decl)
35118 || DECL_COMMON (decl)
35119 || (DECL_INITIAL (decl) == NULL_TREE
35120 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
35121 || DECL_INITIAL (decl) == error_mark_node
35122 || (flag_zero_initialized_in_bss
35123 && initializer_zerop (DECL_INITIAL (decl))))
35124 return;
35126 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35127 name = (*targetm.strip_name_encoding) (name);
35128 set_decl_section_name (decl, name);
35131 /* Select section for constant in constant pool.
35133 On RS/6000, all constants are in the private read-only data area.
35134 However, if this is being placed in the TOC it must be output as a
35135 toc entry. */
35137 static section *
35138 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
35139 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
35141 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35142 return toc_section;
35143 else
35144 return read_only_private_data_section;
35147 /* Remove any trailing [DS] or the like from the symbol name. */
35149 static const char *
35150 rs6000_xcoff_strip_name_encoding (const char *name)
35152 size_t len;
35153 if (*name == '*')
35154 name++;
35155 len = strlen (name);
35156 if (name[len - 1] == ']')
35157 return ggc_alloc_string (name, len - 4);
35158 else
35159 return name;
35162 /* Section attributes. AIX is always PIC. */
35164 static unsigned int
35165 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
35167 unsigned int align;
35168 unsigned int flags = default_section_type_flags (decl, name, reloc);
35170 /* Align to at least UNIT size. */
35171 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
35172 align = MIN_UNITS_PER_WORD;
35173 else
35174 /* Increase alignment of large objects if not already stricter. */
35175 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
35176 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
35177 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
35179 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
35182 /* Output at beginning of assembler file.
35184 Initialize the section names for the RS/6000 at this point.
35186 Specify filename, including full path, to assembler.
35188 We want to go into the TOC section so at least one .toc will be emitted.
35189 Also, in order to output proper .bs/.es pairs, we need at least one static
35190 [RW] section emitted.
35192 Finally, declare mcount when profiling to make the assembler happy. */
35194 static void
35195 rs6000_xcoff_file_start (void)
35197 rs6000_gen_section_name (&xcoff_bss_section_name,
35198 main_input_filename, ".bss_");
35199 rs6000_gen_section_name (&xcoff_private_data_section_name,
35200 main_input_filename, ".rw_");
35201 rs6000_gen_section_name (&xcoff_read_only_section_name,
35202 main_input_filename, ".ro_");
35203 rs6000_gen_section_name (&xcoff_tls_data_section_name,
35204 main_input_filename, ".tls_");
35205 rs6000_gen_section_name (&xcoff_tbss_section_name,
35206 main_input_filename, ".tbss_[UL]");
35208 fputs ("\t.file\t", asm_out_file);
35209 output_quoted_string (asm_out_file, main_input_filename);
35210 fputc ('\n', asm_out_file);
35211 if (write_symbols != NO_DEBUG)
35212 switch_to_section (private_data_section);
35213 switch_to_section (toc_section);
35214 switch_to_section (text_section);
35215 if (profile_flag)
35216 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
35217 rs6000_file_start ();
35220 /* Output at end of assembler file.
35221 On the RS/6000, referencing data should automatically pull in text. */
35223 static void
35224 rs6000_xcoff_file_end (void)
35226 switch_to_section (text_section);
35227 fputs ("_section_.text:\n", asm_out_file);
35228 switch_to_section (data_section);
35229 fputs (TARGET_32BIT
35230 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
35231 asm_out_file);
35234 struct declare_alias_data
35236 FILE *file;
35237 bool function_descriptor;
35240 /* Declare alias N. A helper function for for_node_and_aliases. */
35242 static bool
35243 rs6000_declare_alias (struct symtab_node *n, void *d)
35245 struct declare_alias_data *data = (struct declare_alias_data *)d;
35246 /* Main symbol is output specially, because varasm machinery does part of
35247 the job for us - we do not need to declare .globl/lglobs and such. */
35248 if (!n->alias || n->weakref)
35249 return false;
35251 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
35252 return false;
35254 /* Prevent assemble_alias from trying to use .set pseudo operation
35255 that does not behave as expected by the middle-end. */
35256 TREE_ASM_WRITTEN (n->decl) = true;
35258 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
35259 char *buffer = (char *) alloca (strlen (name) + 2);
35260 char *p;
35261 int dollar_inside = 0;
35263 strcpy (buffer, name);
35264 p = strchr (buffer, '$');
35265 while (p) {
35266 *p = '_';
35267 dollar_inside++;
35268 p = strchr (p + 1, '$');
35270 if (TREE_PUBLIC (n->decl))
35272 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
35274 if (dollar_inside) {
35275 if (data->function_descriptor)
35276 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35277 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35279 if (data->function_descriptor)
35281 fputs ("\t.globl .", data->file);
35282 RS6000_OUTPUT_BASENAME (data->file, buffer);
35283 putc ('\n', data->file);
35285 fputs ("\t.globl ", data->file);
35286 RS6000_OUTPUT_BASENAME (data->file, buffer);
35287 putc ('\n', data->file);
35289 #ifdef ASM_WEAKEN_DECL
35290 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
35291 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
35292 #endif
35294 else
35296 if (dollar_inside)
35298 if (data->function_descriptor)
35299 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35300 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35302 if (data->function_descriptor)
35304 fputs ("\t.lglobl .", data->file);
35305 RS6000_OUTPUT_BASENAME (data->file, buffer);
35306 putc ('\n', data->file);
35308 fputs ("\t.lglobl ", data->file);
35309 RS6000_OUTPUT_BASENAME (data->file, buffer);
35310 putc ('\n', data->file);
35312 if (data->function_descriptor)
35313 fputs (".", data->file);
35314 RS6000_OUTPUT_BASENAME (data->file, buffer);
35315 fputs (":\n", data->file);
35316 return false;
35319 /* This macro produces the initial definition of a function name.
35320 On the RS/6000, we need to place an extra '.' in the function name and
35321 output the function descriptor.
35322 Dollar signs are converted to underscores.
35324 The csect for the function will have already been created when
35325 text_section was selected. We do have to go back to that csect, however.
35327 The third and fourth parameters to the .function pseudo-op (16 and 044)
35328 are placeholders which no longer have any use.
35330 Because AIX assembler's .set command has unexpected semantics, we output
35331 all aliases as alternative labels in front of the definition. */
35333 void
35334 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
35336 char *buffer = (char *) alloca (strlen (name) + 1);
35337 char *p;
35338 int dollar_inside = 0;
35339 struct declare_alias_data data = {file, false};
35341 strcpy (buffer, name);
35342 p = strchr (buffer, '$');
35343 while (p) {
35344 *p = '_';
35345 dollar_inside++;
35346 p = strchr (p + 1, '$');
35348 if (TREE_PUBLIC (decl))
35350 if (!RS6000_WEAK || !DECL_WEAK (decl))
35352 if (dollar_inside) {
35353 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35354 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35356 fputs ("\t.globl .", file);
35357 RS6000_OUTPUT_BASENAME (file, buffer);
35358 putc ('\n', file);
35361 else
35363 if (dollar_inside) {
35364 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35365 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35367 fputs ("\t.lglobl .", file);
35368 RS6000_OUTPUT_BASENAME (file, buffer);
35369 putc ('\n', file);
35371 fputs ("\t.csect ", file);
35372 RS6000_OUTPUT_BASENAME (file, buffer);
35373 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
35374 RS6000_OUTPUT_BASENAME (file, buffer);
35375 fputs (":\n", file);
35376 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35377 &data, true);
35378 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
35379 RS6000_OUTPUT_BASENAME (file, buffer);
35380 fputs (", TOC[tc0], 0\n", file);
35381 in_section = NULL;
35382 switch_to_section (function_section (decl));
35383 putc ('.', file);
35384 RS6000_OUTPUT_BASENAME (file, buffer);
35385 fputs (":\n", file);
35386 data.function_descriptor = true;
35387 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35388 &data, true);
35389 if (!DECL_IGNORED_P (decl))
35391 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35392 xcoffout_declare_function (file, decl, buffer);
35393 else if (write_symbols == DWARF2_DEBUG)
35395 name = (*targetm.strip_name_encoding) (name);
35396 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
35399 return;
35402 /* This macro produces the initial definition of a object (variable) name.
35403 Because AIX assembler's .set command has unexpected semantics, we output
35404 all aliases as alternative labels in front of the definition. */
35406 void
35407 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
35409 struct declare_alias_data data = {file, false};
35410 RS6000_OUTPUT_BASENAME (file, name);
35411 fputs (":\n", file);
35412 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35413 &data, true);
35416 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
35418 void
35419 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
35421 fputs (integer_asm_op (size, FALSE), file);
35422 assemble_name (file, label);
35423 fputs ("-$", file);
35426 /* Output a symbol offset relative to the dbase for the current object.
35427 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
35428 signed offsets.
35430 __gcc_unwind_dbase is embedded in all executables/libraries through
35431 libgcc/config/rs6000/crtdbase.S. */
35433 void
35434 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
35436 fputs (integer_asm_op (size, FALSE), file);
35437 assemble_name (file, label);
35438 fputs("-__gcc_unwind_dbase", file);
35441 #ifdef HAVE_AS_TLS
35442 static void
35443 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
35445 rtx symbol;
35446 int flags;
35447 const char *symname;
35449 default_encode_section_info (decl, rtl, first);
35451 /* Careful not to prod global register variables. */
35452 if (!MEM_P (rtl))
35453 return;
35454 symbol = XEXP (rtl, 0);
35455 if (GET_CODE (symbol) != SYMBOL_REF)
35456 return;
35458 flags = SYMBOL_REF_FLAGS (symbol);
35460 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35461 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
35463 SYMBOL_REF_FLAGS (symbol) = flags;
35465 /* Append mapping class to extern decls. */
35466 symname = XSTR (symbol, 0);
35467 if (decl /* sync condition with assemble_external () */
35468 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
35469 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
35470 || TREE_CODE (decl) == FUNCTION_DECL)
35471 && symname[strlen (symname) - 1] != ']')
35473 char *newname = (char *) alloca (strlen (symname) + 5);
35474 strcpy (newname, symname);
35475 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
35476 ? "[DS]" : "[UA]"));
35477 XSTR (symbol, 0) = ggc_strdup (newname);
35480 #endif /* HAVE_AS_TLS */
35481 #endif /* TARGET_XCOFF */
35483 /* Return true if INSN should not be copied. */
35485 static bool
35486 rs6000_cannot_copy_insn_p (rtx_insn *insn)
35488 return recog_memoized (insn) >= 0
35489 && get_attr_cannot_copy (insn);
35492 /* Compute a (partial) cost for rtx X. Return true if the complete
35493 cost has been computed, and false if subexpressions should be
35494 scanned. In either case, *TOTAL contains the cost result. */
35496 static bool
35497 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
35498 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
35500 int code = GET_CODE (x);
35502 switch (code)
35504 /* On the RS/6000, if it is valid in the insn, it is free. */
35505 case CONST_INT:
35506 if (((outer_code == SET
35507 || outer_code == PLUS
35508 || outer_code == MINUS)
35509 && (satisfies_constraint_I (x)
35510 || satisfies_constraint_L (x)))
35511 || (outer_code == AND
35512 && (satisfies_constraint_K (x)
35513 || (mode == SImode
35514 ? satisfies_constraint_L (x)
35515 : satisfies_constraint_J (x))))
35516 || ((outer_code == IOR || outer_code == XOR)
35517 && (satisfies_constraint_K (x)
35518 || (mode == SImode
35519 ? satisfies_constraint_L (x)
35520 : satisfies_constraint_J (x))))
35521 || outer_code == ASHIFT
35522 || outer_code == ASHIFTRT
35523 || outer_code == LSHIFTRT
35524 || outer_code == ROTATE
35525 || outer_code == ROTATERT
35526 || outer_code == ZERO_EXTRACT
35527 || (outer_code == MULT
35528 && satisfies_constraint_I (x))
35529 || ((outer_code == DIV || outer_code == UDIV
35530 || outer_code == MOD || outer_code == UMOD)
35531 && exact_log2 (INTVAL (x)) >= 0)
35532 || (outer_code == COMPARE
35533 && (satisfies_constraint_I (x)
35534 || satisfies_constraint_K (x)))
35535 || ((outer_code == EQ || outer_code == NE)
35536 && (satisfies_constraint_I (x)
35537 || satisfies_constraint_K (x)
35538 || (mode == SImode
35539 ? satisfies_constraint_L (x)
35540 : satisfies_constraint_J (x))))
35541 || (outer_code == GTU
35542 && satisfies_constraint_I (x))
35543 || (outer_code == LTU
35544 && satisfies_constraint_P (x)))
35546 *total = 0;
35547 return true;
35549 else if ((outer_code == PLUS
35550 && reg_or_add_cint_operand (x, VOIDmode))
35551 || (outer_code == MINUS
35552 && reg_or_sub_cint_operand (x, VOIDmode))
35553 || ((outer_code == SET
35554 || outer_code == IOR
35555 || outer_code == XOR)
35556 && (INTVAL (x)
35557 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
35559 *total = COSTS_N_INSNS (1);
35560 return true;
35562 /* FALLTHRU */
35564 case CONST_DOUBLE:
35565 case CONST_WIDE_INT:
35566 case CONST:
35567 case HIGH:
35568 case SYMBOL_REF:
35569 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35570 return true;
35572 case MEM:
35573 /* When optimizing for size, MEM should be slightly more expensive
35574 than generating address, e.g., (plus (reg) (const)).
35575 L1 cache latency is about two instructions. */
35576 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35577 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
35578 *total += COSTS_N_INSNS (100);
35579 return true;
35581 case LABEL_REF:
35582 *total = 0;
35583 return true;
35585 case PLUS:
35586 case MINUS:
35587 if (FLOAT_MODE_P (mode))
35588 *total = rs6000_cost->fp;
35589 else
35590 *total = COSTS_N_INSNS (1);
35591 return false;
35593 case MULT:
35594 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35595 && satisfies_constraint_I (XEXP (x, 1)))
35597 if (INTVAL (XEXP (x, 1)) >= -256
35598 && INTVAL (XEXP (x, 1)) <= 255)
35599 *total = rs6000_cost->mulsi_const9;
35600 else
35601 *total = rs6000_cost->mulsi_const;
35603 else if (mode == SFmode)
35604 *total = rs6000_cost->fp;
35605 else if (FLOAT_MODE_P (mode))
35606 *total = rs6000_cost->dmul;
35607 else if (mode == DImode)
35608 *total = rs6000_cost->muldi;
35609 else
35610 *total = rs6000_cost->mulsi;
35611 return false;
35613 case FMA:
35614 if (mode == SFmode)
35615 *total = rs6000_cost->fp;
35616 else
35617 *total = rs6000_cost->dmul;
35618 break;
35620 case DIV:
35621 case MOD:
35622 if (FLOAT_MODE_P (mode))
35624 *total = mode == DFmode ? rs6000_cost->ddiv
35625 : rs6000_cost->sdiv;
35626 return false;
35628 /* FALLTHRU */
35630 case UDIV:
35631 case UMOD:
35632 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35633 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
35635 if (code == DIV || code == MOD)
35636 /* Shift, addze */
35637 *total = COSTS_N_INSNS (2);
35638 else
35639 /* Shift */
35640 *total = COSTS_N_INSNS (1);
35642 else
35644 if (GET_MODE (XEXP (x, 1)) == DImode)
35645 *total = rs6000_cost->divdi;
35646 else
35647 *total = rs6000_cost->divsi;
35649 /* Add in shift and subtract for MOD unless we have a mod instruction. */
35650 if (!TARGET_MODULO && (code == MOD || code == UMOD))
35651 *total += COSTS_N_INSNS (2);
35652 return false;
35654 case CTZ:
35655 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
35656 return false;
35658 case FFS:
35659 *total = COSTS_N_INSNS (4);
35660 return false;
35662 case POPCOUNT:
35663 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
35664 return false;
35666 case PARITY:
35667 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
35668 return false;
35670 case NOT:
35671 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
35672 *total = 0;
35673 else
35674 *total = COSTS_N_INSNS (1);
35675 return false;
35677 case AND:
35678 if (CONST_INT_P (XEXP (x, 1)))
35680 rtx left = XEXP (x, 0);
35681 rtx_code left_code = GET_CODE (left);
35683 /* rotate-and-mask: 1 insn. */
35684 if ((left_code == ROTATE
35685 || left_code == ASHIFT
35686 || left_code == LSHIFTRT)
35687 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
35689 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
35690 if (!CONST_INT_P (XEXP (left, 1)))
35691 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
35692 *total += COSTS_N_INSNS (1);
35693 return true;
35696 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
35697 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
35698 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
35699 || (val & 0xffff) == val
35700 || (val & 0xffff0000) == val
35701 || ((val & 0xffff) == 0 && mode == SImode))
35703 *total = rtx_cost (left, mode, AND, 0, speed);
35704 *total += COSTS_N_INSNS (1);
35705 return true;
35708 /* 2 insns. */
35709 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
35711 *total = rtx_cost (left, mode, AND, 0, speed);
35712 *total += COSTS_N_INSNS (2);
35713 return true;
35717 *total = COSTS_N_INSNS (1);
35718 return false;
35720 case IOR:
35721 /* FIXME */
35722 *total = COSTS_N_INSNS (1);
35723 return true;
35725 case CLZ:
35726 case XOR:
35727 case ZERO_EXTRACT:
35728 *total = COSTS_N_INSNS (1);
35729 return false;
35731 case ASHIFT:
35732 /* The EXTSWSLI instruction is a combined instruction. Don't count both
35733 the sign extend and shift separately within the insn. */
35734 if (TARGET_EXTSWSLI && mode == DImode
35735 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
35736 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
35738 *total = 0;
35739 return false;
35741 /* fall through */
35743 case ASHIFTRT:
35744 case LSHIFTRT:
35745 case ROTATE:
35746 case ROTATERT:
35747 /* Handle mul_highpart. */
35748 if (outer_code == TRUNCATE
35749 && GET_CODE (XEXP (x, 0)) == MULT)
35751 if (mode == DImode)
35752 *total = rs6000_cost->muldi;
35753 else
35754 *total = rs6000_cost->mulsi;
35755 return true;
35757 else if (outer_code == AND)
35758 *total = 0;
35759 else
35760 *total = COSTS_N_INSNS (1);
35761 return false;
35763 case SIGN_EXTEND:
35764 case ZERO_EXTEND:
35765 if (GET_CODE (XEXP (x, 0)) == MEM)
35766 *total = 0;
35767 else
35768 *total = COSTS_N_INSNS (1);
35769 return false;
35771 case COMPARE:
35772 case NEG:
35773 case ABS:
35774 if (!FLOAT_MODE_P (mode))
35776 *total = COSTS_N_INSNS (1);
35777 return false;
35779 /* FALLTHRU */
35781 case FLOAT:
35782 case UNSIGNED_FLOAT:
35783 case FIX:
35784 case UNSIGNED_FIX:
35785 case FLOAT_TRUNCATE:
35786 *total = rs6000_cost->fp;
35787 return false;
35789 case FLOAT_EXTEND:
35790 if (mode == DFmode)
35791 *total = rs6000_cost->sfdf_convert;
35792 else
35793 *total = rs6000_cost->fp;
35794 return false;
35796 case UNSPEC:
35797 switch (XINT (x, 1))
35799 case UNSPEC_FRSP:
35800 *total = rs6000_cost->fp;
35801 return true;
35803 default:
35804 break;
35806 break;
35808 case CALL:
35809 case IF_THEN_ELSE:
35810 if (!speed)
35812 *total = COSTS_N_INSNS (1);
35813 return true;
35815 else if (FLOAT_MODE_P (mode)
35816 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
35818 *total = rs6000_cost->fp;
35819 return false;
35821 break;
35823 case NE:
35824 case EQ:
35825 case GTU:
35826 case LTU:
35827 /* Carry bit requires mode == Pmode.
35828 NEG or PLUS already counted so only add one. */
35829 if (mode == Pmode
35830 && (outer_code == NEG || outer_code == PLUS))
35832 *total = COSTS_N_INSNS (1);
35833 return true;
35835 if (outer_code == SET)
35837 if (XEXP (x, 1) == const0_rtx)
35839 if (TARGET_ISEL && !TARGET_MFCRF)
35840 *total = COSTS_N_INSNS (8);
35841 else
35842 *total = COSTS_N_INSNS (2);
35843 return true;
35845 else
35847 *total = COSTS_N_INSNS (3);
35848 return false;
35851 /* FALLTHRU */
35853 case GT:
35854 case LT:
35855 case UNORDERED:
35856 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
35858 if (TARGET_ISEL && !TARGET_MFCRF)
35859 *total = COSTS_N_INSNS (8);
35860 else
35861 *total = COSTS_N_INSNS (2);
35862 return true;
35864 /* CC COMPARE. */
35865 if (outer_code == COMPARE)
35867 *total = 0;
35868 return true;
35870 break;
35872 default:
35873 break;
35876 return false;
35879 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
35881 static bool
35882 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
35883 int opno, int *total, bool speed)
35885 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
35887 fprintf (stderr,
35888 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
35889 "opno = %d, total = %d, speed = %s, x:\n",
35890 ret ? "complete" : "scan inner",
35891 GET_MODE_NAME (mode),
35892 GET_RTX_NAME (outer_code),
35893 opno,
35894 *total,
35895 speed ? "true" : "false");
35897 debug_rtx (x);
35899 return ret;
35902 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35904 static int
35905 rs6000_debug_address_cost (rtx x, machine_mode mode,
35906 addr_space_t as, bool speed)
35908 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35910 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35911 ret, speed ? "true" : "false");
35912 debug_rtx (x);
35914 return ret;
35918 /* A C expression returning the cost of moving data from a register of class
35919 CLASS1 to one of CLASS2. */
35921 static int
35922 rs6000_register_move_cost (machine_mode mode,
35923 reg_class_t from, reg_class_t to)
35925 int ret;
35927 if (TARGET_DEBUG_COST)
35928 dbg_cost_ctrl++;
35930 /* Moves from/to GENERAL_REGS. */
35931 if (reg_classes_intersect_p (to, GENERAL_REGS)
35932 || reg_classes_intersect_p (from, GENERAL_REGS))
35934 reg_class_t rclass = from;
35936 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35937 rclass = to;
35939 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35940 ret = (rs6000_memory_move_cost (mode, rclass, false)
35941 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35943 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35944 shift. */
35945 else if (rclass == CR_REGS)
35946 ret = 4;
35948 /* For those processors that have slow LR/CTR moves, make them more
35949 expensive than memory in order to bias spills to memory .*/
35950 else if ((rs6000_cpu == PROCESSOR_POWER6
35951 || rs6000_cpu == PROCESSOR_POWER7
35952 || rs6000_cpu == PROCESSOR_POWER8
35953 || rs6000_cpu == PROCESSOR_POWER9)
35954 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35955 ret = 6 * hard_regno_nregs[0][mode];
35957 else
35958 /* A move will cost one instruction per GPR moved. */
35959 ret = 2 * hard_regno_nregs[0][mode];
35962 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35963 else if (VECTOR_MEM_VSX_P (mode)
35964 && reg_classes_intersect_p (to, VSX_REGS)
35965 && reg_classes_intersect_p (from, VSX_REGS))
35966 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
35968 /* Moving between two similar registers is just one instruction. */
35969 else if (reg_classes_intersect_p (to, from))
35970 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35972 /* Everything else has to go through GENERAL_REGS. */
35973 else
35974 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35975 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35977 if (TARGET_DEBUG_COST)
35979 if (dbg_cost_ctrl == 1)
35980 fprintf (stderr,
35981 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35982 ret, GET_MODE_NAME (mode), reg_class_names[from],
35983 reg_class_names[to]);
35984 dbg_cost_ctrl--;
35987 return ret;
35990 /* A C expressions returning the cost of moving data of MODE from a register to
35991 or from memory. */
35993 static int
35994 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
35995 bool in ATTRIBUTE_UNUSED)
35997 int ret;
35999 if (TARGET_DEBUG_COST)
36000 dbg_cost_ctrl++;
36002 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
36003 ret = 4 * hard_regno_nregs[0][mode];
36004 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
36005 || reg_classes_intersect_p (rclass, VSX_REGS)))
36006 ret = 4 * hard_regno_nregs[32][mode];
36007 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
36008 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
36009 else
36010 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
36012 if (TARGET_DEBUG_COST)
36014 if (dbg_cost_ctrl == 1)
36015 fprintf (stderr,
36016 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
36017 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
36018 dbg_cost_ctrl--;
36021 return ret;
36024 /* Returns a code for a target-specific builtin that implements
36025 reciprocal of the function, or NULL_TREE if not available. */
36027 static tree
36028 rs6000_builtin_reciprocal (tree fndecl)
36030 switch (DECL_FUNCTION_CODE (fndecl))
36032 case VSX_BUILTIN_XVSQRTDP:
36033 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
36034 return NULL_TREE;
36036 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
36038 case VSX_BUILTIN_XVSQRTSP:
36039 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
36040 return NULL_TREE;
36042 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
36044 default:
36045 return NULL_TREE;
36049 /* Load up a constant. If the mode is a vector mode, splat the value across
36050 all of the vector elements. */
36052 static rtx
36053 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
36055 rtx reg;
36057 if (mode == SFmode || mode == DFmode)
36059 rtx d = const_double_from_real_value (dconst, mode);
36060 reg = force_reg (mode, d);
36062 else if (mode == V4SFmode)
36064 rtx d = const_double_from_real_value (dconst, SFmode);
36065 rtvec v = gen_rtvec (4, d, d, d, d);
36066 reg = gen_reg_rtx (mode);
36067 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36069 else if (mode == V2DFmode)
36071 rtx d = const_double_from_real_value (dconst, DFmode);
36072 rtvec v = gen_rtvec (2, d, d);
36073 reg = gen_reg_rtx (mode);
36074 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36076 else
36077 gcc_unreachable ();
36079 return reg;
36082 /* Generate an FMA instruction. */
36084 static void
36085 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
36087 machine_mode mode = GET_MODE (target);
36088 rtx dst;
36090 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
36091 gcc_assert (dst != NULL);
36093 if (dst != target)
36094 emit_move_insn (target, dst);
36097 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
36099 static void
36100 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
36102 machine_mode mode = GET_MODE (dst);
36103 rtx r;
36105 /* This is a tad more complicated, since the fnma_optab is for
36106 a different expression: fma(-m1, m2, a), which is the same
36107 thing except in the case of signed zeros.
36109 Fortunately we know that if FMA is supported that FNMSUB is
36110 also supported in the ISA. Just expand it directly. */
36112 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
36114 r = gen_rtx_NEG (mode, a);
36115 r = gen_rtx_FMA (mode, m1, m2, r);
36116 r = gen_rtx_NEG (mode, r);
36117 emit_insn (gen_rtx_SET (dst, r));
36120 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
36121 add a reg_note saying that this was a division. Support both scalar and
36122 vector divide. Assumes no trapping math and finite arguments. */
36124 void
36125 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
36127 machine_mode mode = GET_MODE (dst);
36128 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
36129 int i;
36131 /* Low precision estimates guarantee 5 bits of accuracy. High
36132 precision estimates guarantee 14 bits of accuracy. SFmode
36133 requires 23 bits of accuracy. DFmode requires 52 bits of
36134 accuracy. Each pass at least doubles the accuracy, leading
36135 to the following. */
36136 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36137 if (mode == DFmode || mode == V2DFmode)
36138 passes++;
36140 enum insn_code code = optab_handler (smul_optab, mode);
36141 insn_gen_fn gen_mul = GEN_FCN (code);
36143 gcc_assert (code != CODE_FOR_nothing);
36145 one = rs6000_load_constant_and_splat (mode, dconst1);
36147 /* x0 = 1./d estimate */
36148 x0 = gen_reg_rtx (mode);
36149 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
36150 UNSPEC_FRES)));
36152 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
36153 if (passes > 1) {
36155 /* e0 = 1. - d * x0 */
36156 e0 = gen_reg_rtx (mode);
36157 rs6000_emit_nmsub (e0, d, x0, one);
36159 /* x1 = x0 + e0 * x0 */
36160 x1 = gen_reg_rtx (mode);
36161 rs6000_emit_madd (x1, e0, x0, x0);
36163 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
36164 ++i, xprev = xnext, eprev = enext) {
36166 /* enext = eprev * eprev */
36167 enext = gen_reg_rtx (mode);
36168 emit_insn (gen_mul (enext, eprev, eprev));
36170 /* xnext = xprev + enext * xprev */
36171 xnext = gen_reg_rtx (mode);
36172 rs6000_emit_madd (xnext, enext, xprev, xprev);
36175 } else
36176 xprev = x0;
36178 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
36180 /* u = n * xprev */
36181 u = gen_reg_rtx (mode);
36182 emit_insn (gen_mul (u, n, xprev));
36184 /* v = n - (d * u) */
36185 v = gen_reg_rtx (mode);
36186 rs6000_emit_nmsub (v, d, u, n);
36188 /* dst = (v * xprev) + u */
36189 rs6000_emit_madd (dst, v, xprev, u);
36191 if (note_p)
36192 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
36195 /* Goldschmidt's Algorithm for single/double-precision floating point
36196 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
36198 void
36199 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
36201 machine_mode mode = GET_MODE (src);
36202 rtx e = gen_reg_rtx (mode);
36203 rtx g = gen_reg_rtx (mode);
36204 rtx h = gen_reg_rtx (mode);
36206 /* Low precision estimates guarantee 5 bits of accuracy. High
36207 precision estimates guarantee 14 bits of accuracy. SFmode
36208 requires 23 bits of accuracy. DFmode requires 52 bits of
36209 accuracy. Each pass at least doubles the accuracy, leading
36210 to the following. */
36211 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36212 if (mode == DFmode || mode == V2DFmode)
36213 passes++;
36215 int i;
36216 rtx mhalf;
36217 enum insn_code code = optab_handler (smul_optab, mode);
36218 insn_gen_fn gen_mul = GEN_FCN (code);
36220 gcc_assert (code != CODE_FOR_nothing);
36222 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
36224 /* e = rsqrt estimate */
36225 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
36226 UNSPEC_RSQRT)));
36228 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
36229 if (!recip)
36231 rtx zero = force_reg (mode, CONST0_RTX (mode));
36233 if (mode == SFmode)
36235 rtx target = emit_conditional_move (e, GT, src, zero, mode,
36236 e, zero, mode, 0);
36237 if (target != e)
36238 emit_move_insn (e, target);
36240 else
36242 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
36243 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
36247 /* g = sqrt estimate. */
36248 emit_insn (gen_mul (g, e, src));
36249 /* h = 1/(2*sqrt) estimate. */
36250 emit_insn (gen_mul (h, e, mhalf));
36252 if (recip)
36254 if (passes == 1)
36256 rtx t = gen_reg_rtx (mode);
36257 rs6000_emit_nmsub (t, g, h, mhalf);
36258 /* Apply correction directly to 1/rsqrt estimate. */
36259 rs6000_emit_madd (dst, e, t, e);
36261 else
36263 for (i = 0; i < passes; i++)
36265 rtx t1 = gen_reg_rtx (mode);
36266 rtx g1 = gen_reg_rtx (mode);
36267 rtx h1 = gen_reg_rtx (mode);
36269 rs6000_emit_nmsub (t1, g, h, mhalf);
36270 rs6000_emit_madd (g1, g, t1, g);
36271 rs6000_emit_madd (h1, h, t1, h);
36273 g = g1;
36274 h = h1;
36276 /* Multiply by 2 for 1/rsqrt. */
36277 emit_insn (gen_add3_insn (dst, h, h));
36280 else
36282 rtx t = gen_reg_rtx (mode);
36283 rs6000_emit_nmsub (t, g, h, mhalf);
36284 rs6000_emit_madd (dst, g, t, g);
36287 return;
36290 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
36291 (Power7) targets. DST is the target, and SRC is the argument operand. */
36293 void
36294 rs6000_emit_popcount (rtx dst, rtx src)
36296 machine_mode mode = GET_MODE (dst);
36297 rtx tmp1, tmp2;
36299 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
36300 if (TARGET_POPCNTD)
36302 if (mode == SImode)
36303 emit_insn (gen_popcntdsi2 (dst, src));
36304 else
36305 emit_insn (gen_popcntddi2 (dst, src));
36306 return;
36309 tmp1 = gen_reg_rtx (mode);
36311 if (mode == SImode)
36313 emit_insn (gen_popcntbsi2 (tmp1, src));
36314 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
36315 NULL_RTX, 0);
36316 tmp2 = force_reg (SImode, tmp2);
36317 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
36319 else
36321 emit_insn (gen_popcntbdi2 (tmp1, src));
36322 tmp2 = expand_mult (DImode, tmp1,
36323 GEN_INT ((HOST_WIDE_INT)
36324 0x01010101 << 32 | 0x01010101),
36325 NULL_RTX, 0);
36326 tmp2 = force_reg (DImode, tmp2);
36327 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
36332 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
36333 target, and SRC is the argument operand. */
36335 void
36336 rs6000_emit_parity (rtx dst, rtx src)
36338 machine_mode mode = GET_MODE (dst);
36339 rtx tmp;
36341 tmp = gen_reg_rtx (mode);
36343 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
36344 if (TARGET_CMPB)
36346 if (mode == SImode)
36348 emit_insn (gen_popcntbsi2 (tmp, src));
36349 emit_insn (gen_paritysi2_cmpb (dst, tmp));
36351 else
36353 emit_insn (gen_popcntbdi2 (tmp, src));
36354 emit_insn (gen_paritydi2_cmpb (dst, tmp));
36356 return;
36359 if (mode == SImode)
36361 /* Is mult+shift >= shift+xor+shift+xor? */
36362 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
36364 rtx tmp1, tmp2, tmp3, tmp4;
36366 tmp1 = gen_reg_rtx (SImode);
36367 emit_insn (gen_popcntbsi2 (tmp1, src));
36369 tmp2 = gen_reg_rtx (SImode);
36370 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
36371 tmp3 = gen_reg_rtx (SImode);
36372 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
36374 tmp4 = gen_reg_rtx (SImode);
36375 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
36376 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
36378 else
36379 rs6000_emit_popcount (tmp, src);
36380 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
36382 else
36384 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
36385 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
36387 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
36389 tmp1 = gen_reg_rtx (DImode);
36390 emit_insn (gen_popcntbdi2 (tmp1, src));
36392 tmp2 = gen_reg_rtx (DImode);
36393 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
36394 tmp3 = gen_reg_rtx (DImode);
36395 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
36397 tmp4 = gen_reg_rtx (DImode);
36398 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
36399 tmp5 = gen_reg_rtx (DImode);
36400 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
36402 tmp6 = gen_reg_rtx (DImode);
36403 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
36404 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
36406 else
36407 rs6000_emit_popcount (tmp, src);
36408 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
36412 /* Expand an Altivec constant permutation for little endian mode.
36413 There are two issues: First, the two input operands must be
36414 swapped so that together they form a double-wide array in LE
36415 order. Second, the vperm instruction has surprising behavior
36416 in LE mode: it interprets the elements of the source vectors
36417 in BE mode ("left to right") and interprets the elements of
36418 the destination vector in LE mode ("right to left"). To
36419 correct for this, we must subtract each element of the permute
36420 control vector from 31.
36422 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
36423 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
36424 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
36425 serve as the permute control vector. Then, in BE mode,
36427 vperm 9,10,11,12
36429 places the desired result in vr9. However, in LE mode the
36430 vector contents will be
36432 vr10 = 00000003 00000002 00000001 00000000
36433 vr11 = 00000007 00000006 00000005 00000004
36435 The result of the vperm using the same permute control vector is
36437 vr9 = 05000000 07000000 01000000 03000000
36439 That is, the leftmost 4 bytes of vr10 are interpreted as the
36440 source for the rightmost 4 bytes of vr9, and so on.
36442 If we change the permute control vector to
36444 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
36446 and issue
36448 vperm 9,11,10,12
36450 we get the desired
36452 vr9 = 00000006 00000004 00000002 00000000. */
36454 void
36455 altivec_expand_vec_perm_const_le (rtx operands[4])
36457 unsigned int i;
36458 rtx perm[16];
36459 rtx constv, unspec;
36460 rtx target = operands[0];
36461 rtx op0 = operands[1];
36462 rtx op1 = operands[2];
36463 rtx sel = operands[3];
36465 /* Unpack and adjust the constant selector. */
36466 for (i = 0; i < 16; ++i)
36468 rtx e = XVECEXP (sel, 0, i);
36469 unsigned int elt = 31 - (INTVAL (e) & 31);
36470 perm[i] = GEN_INT (elt);
36473 /* Expand to a permute, swapping the inputs and using the
36474 adjusted selector. */
36475 if (!REG_P (op0))
36476 op0 = force_reg (V16QImode, op0);
36477 if (!REG_P (op1))
36478 op1 = force_reg (V16QImode, op1);
36480 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
36481 constv = force_reg (V16QImode, constv);
36482 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
36483 UNSPEC_VPERM);
36484 if (!REG_P (target))
36486 rtx tmp = gen_reg_rtx (V16QImode);
36487 emit_move_insn (tmp, unspec);
36488 unspec = tmp;
36491 emit_move_insn (target, unspec);
36494 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
36495 permute control vector. But here it's not a constant, so we must
36496 generate a vector NAND or NOR to do the adjustment. */
36498 void
36499 altivec_expand_vec_perm_le (rtx operands[4])
36501 rtx notx, iorx, unspec;
36502 rtx target = operands[0];
36503 rtx op0 = operands[1];
36504 rtx op1 = operands[2];
36505 rtx sel = operands[3];
36506 rtx tmp = target;
36507 rtx norreg = gen_reg_rtx (V16QImode);
36508 machine_mode mode = GET_MODE (target);
36510 /* Get everything in regs so the pattern matches. */
36511 if (!REG_P (op0))
36512 op0 = force_reg (mode, op0);
36513 if (!REG_P (op1))
36514 op1 = force_reg (mode, op1);
36515 if (!REG_P (sel))
36516 sel = force_reg (V16QImode, sel);
36517 if (!REG_P (target))
36518 tmp = gen_reg_rtx (mode);
36520 if (TARGET_P9_VECTOR)
36522 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
36523 UNSPEC_VPERMR);
36525 else
36527 /* Invert the selector with a VNAND if available, else a VNOR.
36528 The VNAND is preferred for future fusion opportunities. */
36529 notx = gen_rtx_NOT (V16QImode, sel);
36530 iorx = (TARGET_P8_VECTOR
36531 ? gen_rtx_IOR (V16QImode, notx, notx)
36532 : gen_rtx_AND (V16QImode, notx, notx));
36533 emit_insn (gen_rtx_SET (norreg, iorx));
36535 /* Permute with operands reversed and adjusted selector. */
36536 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
36537 UNSPEC_VPERM);
36540 /* Copy into target, possibly by way of a register. */
36541 if (!REG_P (target))
36543 emit_move_insn (tmp, unspec);
36544 unspec = tmp;
36547 emit_move_insn (target, unspec);
36550 /* Expand an Altivec constant permutation. Return true if we match
36551 an efficient implementation; false to fall back to VPERM. */
36553 bool
36554 altivec_expand_vec_perm_const (rtx operands[4])
36556 struct altivec_perm_insn {
36557 HOST_WIDE_INT mask;
36558 enum insn_code impl;
36559 unsigned char perm[16];
36561 static const struct altivec_perm_insn patterns[] = {
36562 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
36563 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
36564 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
36565 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
36566 { OPTION_MASK_ALTIVEC,
36567 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
36568 : CODE_FOR_altivec_vmrglb_direct),
36569 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
36570 { OPTION_MASK_ALTIVEC,
36571 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
36572 : CODE_FOR_altivec_vmrglh_direct),
36573 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
36574 { OPTION_MASK_ALTIVEC,
36575 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
36576 : CODE_FOR_altivec_vmrglw_direct),
36577 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
36578 { OPTION_MASK_ALTIVEC,
36579 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
36580 : CODE_FOR_altivec_vmrghb_direct),
36581 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
36582 { OPTION_MASK_ALTIVEC,
36583 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
36584 : CODE_FOR_altivec_vmrghh_direct),
36585 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
36586 { OPTION_MASK_ALTIVEC,
36587 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
36588 : CODE_FOR_altivec_vmrghw_direct),
36589 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
36590 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
36591 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
36592 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
36593 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
36596 unsigned int i, j, elt, which;
36597 unsigned char perm[16];
36598 rtx target, op0, op1, sel, x;
36599 bool one_vec;
36601 target = operands[0];
36602 op0 = operands[1];
36603 op1 = operands[2];
36604 sel = operands[3];
36606 /* Unpack the constant selector. */
36607 for (i = which = 0; i < 16; ++i)
36609 rtx e = XVECEXP (sel, 0, i);
36610 elt = INTVAL (e) & 31;
36611 which |= (elt < 16 ? 1 : 2);
36612 perm[i] = elt;
36615 /* Simplify the constant selector based on operands. */
36616 switch (which)
36618 default:
36619 gcc_unreachable ();
36621 case 3:
36622 one_vec = false;
36623 if (!rtx_equal_p (op0, op1))
36624 break;
36625 /* FALLTHRU */
36627 case 2:
36628 for (i = 0; i < 16; ++i)
36629 perm[i] &= 15;
36630 op0 = op1;
36631 one_vec = true;
36632 break;
36634 case 1:
36635 op1 = op0;
36636 one_vec = true;
36637 break;
36640 /* Look for splat patterns. */
36641 if (one_vec)
36643 elt = perm[0];
36645 for (i = 0; i < 16; ++i)
36646 if (perm[i] != elt)
36647 break;
36648 if (i == 16)
36650 if (!BYTES_BIG_ENDIAN)
36651 elt = 15 - elt;
36652 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
36653 return true;
36656 if (elt % 2 == 0)
36658 for (i = 0; i < 16; i += 2)
36659 if (perm[i] != elt || perm[i + 1] != elt + 1)
36660 break;
36661 if (i == 16)
36663 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
36664 x = gen_reg_rtx (V8HImode);
36665 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
36666 GEN_INT (field)));
36667 emit_move_insn (target, gen_lowpart (V16QImode, x));
36668 return true;
36672 if (elt % 4 == 0)
36674 for (i = 0; i < 16; i += 4)
36675 if (perm[i] != elt
36676 || perm[i + 1] != elt + 1
36677 || perm[i + 2] != elt + 2
36678 || perm[i + 3] != elt + 3)
36679 break;
36680 if (i == 16)
36682 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
36683 x = gen_reg_rtx (V4SImode);
36684 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
36685 GEN_INT (field)));
36686 emit_move_insn (target, gen_lowpart (V16QImode, x));
36687 return true;
36692 /* Look for merge and pack patterns. */
36693 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
36695 bool swapped;
36697 if ((patterns[j].mask & rs6000_isa_flags) == 0)
36698 continue;
36700 elt = patterns[j].perm[0];
36701 if (perm[0] == elt)
36702 swapped = false;
36703 else if (perm[0] == elt + 16)
36704 swapped = true;
36705 else
36706 continue;
36707 for (i = 1; i < 16; ++i)
36709 elt = patterns[j].perm[i];
36710 if (swapped)
36711 elt = (elt >= 16 ? elt - 16 : elt + 16);
36712 else if (one_vec && elt >= 16)
36713 elt -= 16;
36714 if (perm[i] != elt)
36715 break;
36717 if (i == 16)
36719 enum insn_code icode = patterns[j].impl;
36720 machine_mode omode = insn_data[icode].operand[0].mode;
36721 machine_mode imode = insn_data[icode].operand[1].mode;
36723 /* For little-endian, don't use vpkuwum and vpkuhum if the
36724 underlying vector type is not V4SI and V8HI, respectively.
36725 For example, using vpkuwum with a V8HI picks up the even
36726 halfwords (BE numbering) when the even halfwords (LE
36727 numbering) are what we need. */
36728 if (!BYTES_BIG_ENDIAN
36729 && icode == CODE_FOR_altivec_vpkuwum_direct
36730 && ((GET_CODE (op0) == REG
36731 && GET_MODE (op0) != V4SImode)
36732 || (GET_CODE (op0) == SUBREG
36733 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
36734 continue;
36735 if (!BYTES_BIG_ENDIAN
36736 && icode == CODE_FOR_altivec_vpkuhum_direct
36737 && ((GET_CODE (op0) == REG
36738 && GET_MODE (op0) != V8HImode)
36739 || (GET_CODE (op0) == SUBREG
36740 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
36741 continue;
36743 /* For little-endian, the two input operands must be swapped
36744 (or swapped back) to ensure proper right-to-left numbering
36745 from 0 to 2N-1. */
36746 if (swapped ^ !BYTES_BIG_ENDIAN)
36747 std::swap (op0, op1);
36748 if (imode != V16QImode)
36750 op0 = gen_lowpart (imode, op0);
36751 op1 = gen_lowpart (imode, op1);
36753 if (omode == V16QImode)
36754 x = target;
36755 else
36756 x = gen_reg_rtx (omode);
36757 emit_insn (GEN_FCN (icode) (x, op0, op1));
36758 if (omode != V16QImode)
36759 emit_move_insn (target, gen_lowpart (V16QImode, x));
36760 return true;
36764 if (!BYTES_BIG_ENDIAN)
36766 altivec_expand_vec_perm_const_le (operands);
36767 return true;
36770 return false;
36773 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
36774 Return true if we match an efficient implementation. */
36776 static bool
36777 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
36778 unsigned char perm0, unsigned char perm1)
36780 rtx x;
36782 /* If both selectors come from the same operand, fold to single op. */
36783 if ((perm0 & 2) == (perm1 & 2))
36785 if (perm0 & 2)
36786 op0 = op1;
36787 else
36788 op1 = op0;
36790 /* If both operands are equal, fold to simpler permutation. */
36791 if (rtx_equal_p (op0, op1))
36793 perm0 = perm0 & 1;
36794 perm1 = (perm1 & 1) + 2;
36796 /* If the first selector comes from the second operand, swap. */
36797 else if (perm0 & 2)
36799 if (perm1 & 2)
36800 return false;
36801 perm0 -= 2;
36802 perm1 += 2;
36803 std::swap (op0, op1);
36805 /* If the second selector does not come from the second operand, fail. */
36806 else if ((perm1 & 2) == 0)
36807 return false;
36809 /* Success! */
36810 if (target != NULL)
36812 machine_mode vmode, dmode;
36813 rtvec v;
36815 vmode = GET_MODE (target);
36816 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
36817 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
36818 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
36819 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
36820 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
36821 emit_insn (gen_rtx_SET (target, x));
36823 return true;
36826 bool
36827 rs6000_expand_vec_perm_const (rtx operands[4])
36829 rtx target, op0, op1, sel;
36830 unsigned char perm0, perm1;
36832 target = operands[0];
36833 op0 = operands[1];
36834 op1 = operands[2];
36835 sel = operands[3];
36837 /* Unpack the constant selector. */
36838 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
36839 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
36841 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
36844 /* Test whether a constant permutation is supported. */
36846 static bool
36847 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
36848 const unsigned char *sel)
36850 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
36851 if (TARGET_ALTIVEC)
36852 return true;
36854 /* Check for ps_merge* or evmerge* insns. */
36855 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
36856 || (TARGET_SPE && vmode == V2SImode))
36858 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
36859 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
36860 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
36863 return false;
36866 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
36868 static void
36869 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36870 machine_mode vmode, unsigned nelt, rtx perm[])
36872 machine_mode imode;
36873 rtx x;
36875 imode = vmode;
36876 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
36878 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
36879 imode = mode_for_vector (imode, nelt);
36882 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
36883 x = expand_vec_perm (vmode, op0, op1, x, target);
36884 if (x != target)
36885 emit_move_insn (target, x);
36888 /* Expand an extract even operation. */
36890 void
36891 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36893 machine_mode vmode = GET_MODE (target);
36894 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36895 rtx perm[16];
36897 for (i = 0; i < nelt; i++)
36898 perm[i] = GEN_INT (i * 2);
36900 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36903 /* Expand a vector interleave operation. */
36905 void
36906 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36908 machine_mode vmode = GET_MODE (target);
36909 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36910 rtx perm[16];
36912 high = (highp ? 0 : nelt / 2);
36913 for (i = 0; i < nelt / 2; i++)
36915 perm[i * 2] = GEN_INT (i + high);
36916 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
36919 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36922 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36923 void
36924 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36926 HOST_WIDE_INT hwi_scale (scale);
36927 REAL_VALUE_TYPE r_pow;
36928 rtvec v = rtvec_alloc (2);
36929 rtx elt;
36930 rtx scale_vec = gen_reg_rtx (V2DFmode);
36931 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36932 elt = const_double_from_real_value (r_pow, DFmode);
36933 RTVEC_ELT (v, 0) = elt;
36934 RTVEC_ELT (v, 1) = elt;
36935 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36936 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36939 /* Return an RTX representing where to find the function value of a
36940 function returning MODE. */
36941 static rtx
36942 rs6000_complex_function_value (machine_mode mode)
36944 unsigned int regno;
36945 rtx r1, r2;
36946 machine_mode inner = GET_MODE_INNER (mode);
36947 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36949 if (TARGET_FLOAT128_TYPE
36950 && (mode == KCmode
36951 || (mode == TCmode && TARGET_IEEEQUAD)))
36952 regno = ALTIVEC_ARG_RETURN;
36954 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36955 regno = FP_ARG_RETURN;
36957 else
36959 regno = GP_ARG_RETURN;
36961 /* 32-bit is OK since it'll go in r3/r4. */
36962 if (TARGET_32BIT && inner_bytes >= 4)
36963 return gen_rtx_REG (mode, regno);
36966 if (inner_bytes >= 8)
36967 return gen_rtx_REG (mode, regno);
36969 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36970 const0_rtx);
36971 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36972 GEN_INT (inner_bytes));
36973 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36976 /* Return an rtx describing a return value of MODE as a PARALLEL
36977 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36978 stride REG_STRIDE. */
36980 static rtx
36981 rs6000_parallel_return (machine_mode mode,
36982 int n_elts, machine_mode elt_mode,
36983 unsigned int regno, unsigned int reg_stride)
36985 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36987 int i;
36988 for (i = 0; i < n_elts; i++)
36990 rtx r = gen_rtx_REG (elt_mode, regno);
36991 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36992 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36993 regno += reg_stride;
36996 return par;
36999 /* Target hook for TARGET_FUNCTION_VALUE.
37001 On the SPE, both FPs and vectors are returned in r3.
37003 On RS/6000 an integer value is in r3 and a floating-point value is in
37004 fp1, unless -msoft-float. */
37006 static rtx
37007 rs6000_function_value (const_tree valtype,
37008 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
37009 bool outgoing ATTRIBUTE_UNUSED)
37011 machine_mode mode;
37012 unsigned int regno;
37013 machine_mode elt_mode;
37014 int n_elts;
37016 /* Special handling for structs in darwin64. */
37017 if (TARGET_MACHO
37018 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
37020 CUMULATIVE_ARGS valcum;
37021 rtx valret;
37023 valcum.words = 0;
37024 valcum.fregno = FP_ARG_MIN_REG;
37025 valcum.vregno = ALTIVEC_ARG_MIN_REG;
37026 /* Do a trial code generation as if this were going to be passed as
37027 an argument; if any part goes in memory, we return NULL. */
37028 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
37029 if (valret)
37030 return valret;
37031 /* Otherwise fall through to standard ABI rules. */
37034 mode = TYPE_MODE (valtype);
37036 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
37037 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
37039 int first_reg, n_regs;
37041 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
37043 /* _Decimal128 must use even/odd register pairs. */
37044 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37045 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
37047 else
37049 first_reg = ALTIVEC_ARG_RETURN;
37050 n_regs = 1;
37053 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
37056 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
37057 if (TARGET_32BIT && TARGET_POWERPC64)
37058 switch (mode)
37060 default:
37061 break;
37062 case DImode:
37063 case SCmode:
37064 case DCmode:
37065 case TCmode:
37066 int count = GET_MODE_SIZE (mode) / 4;
37067 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
37070 if ((INTEGRAL_TYPE_P (valtype)
37071 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
37072 || POINTER_TYPE_P (valtype))
37073 mode = TARGET_32BIT ? SImode : DImode;
37075 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37076 /* _Decimal128 must use an even/odd register pair. */
37077 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37078 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
37079 && !FLOAT128_VECTOR_P (mode)
37080 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
37081 regno = FP_ARG_RETURN;
37082 else if (TREE_CODE (valtype) == COMPLEX_TYPE
37083 && targetm.calls.split_complex_arg)
37084 return rs6000_complex_function_value (mode);
37085 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37086 return register is used in both cases, and we won't see V2DImode/V2DFmode
37087 for pure altivec, combine the two cases. */
37088 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
37089 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
37090 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37091 regno = ALTIVEC_ARG_RETURN;
37092 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37093 && (mode == DFmode || mode == DCmode
37094 || FLOAT128_IBM_P (mode) || mode == TCmode))
37095 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37096 else
37097 regno = GP_ARG_RETURN;
37099 return gen_rtx_REG (mode, regno);
37102 /* Define how to find the value returned by a library function
37103 assuming the value has mode MODE. */
37105 rs6000_libcall_value (machine_mode mode)
37107 unsigned int regno;
37109 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
37110 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
37111 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
37113 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37114 /* _Decimal128 must use an even/odd register pair. */
37115 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37116 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
37117 && TARGET_HARD_FLOAT && TARGET_FPRS
37118 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
37119 regno = FP_ARG_RETURN;
37120 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37121 return register is used in both cases, and we won't see V2DImode/V2DFmode
37122 for pure altivec, combine the two cases. */
37123 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
37124 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
37125 regno = ALTIVEC_ARG_RETURN;
37126 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
37127 return rs6000_complex_function_value (mode);
37128 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37129 && (mode == DFmode || mode == DCmode
37130 || FLOAT128_IBM_P (mode) || mode == TCmode))
37131 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37132 else
37133 regno = GP_ARG_RETURN;
37135 return gen_rtx_REG (mode, regno);
37139 /* Return true if we use LRA instead of reload pass. */
37140 static bool
37141 rs6000_lra_p (void)
37143 return TARGET_LRA;
37146 /* Given FROM and TO register numbers, say whether this elimination is allowed.
37147 Frame pointer elimination is automatically handled.
37149 For the RS/6000, if frame pointer elimination is being done, we would like
37150 to convert ap into fp, not sp.
37152 We need r30 if -mminimal-toc was specified, and there are constant pool
37153 references. */
37155 static bool
37156 rs6000_can_eliminate (const int from, const int to)
37158 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
37159 ? ! frame_pointer_needed
37160 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
37161 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
37162 : true);
37165 /* Define the offset between two registers, FROM to be eliminated and its
37166 replacement TO, at the start of a routine. */
37167 HOST_WIDE_INT
37168 rs6000_initial_elimination_offset (int from, int to)
37170 rs6000_stack_t *info = rs6000_stack_info ();
37171 HOST_WIDE_INT offset;
37173 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37174 offset = info->push_p ? 0 : -info->total_size;
37175 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37177 offset = info->push_p ? 0 : -info->total_size;
37178 if (FRAME_GROWS_DOWNWARD)
37179 offset += info->fixed_size + info->vars_size + info->parm_size;
37181 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37182 offset = FRAME_GROWS_DOWNWARD
37183 ? info->fixed_size + info->vars_size + info->parm_size
37184 : 0;
37185 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37186 offset = info->total_size;
37187 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37188 offset = info->push_p ? info->total_size : 0;
37189 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
37190 offset = 0;
37191 else
37192 gcc_unreachable ();
37194 return offset;
37197 static rtx
37198 rs6000_dwarf_register_span (rtx reg)
37200 rtx parts[8];
37201 int i, words;
37202 unsigned regno = REGNO (reg);
37203 machine_mode mode = GET_MODE (reg);
37205 if (TARGET_SPE
37206 && regno < 32
37207 && (SPE_VECTOR_MODE (GET_MODE (reg))
37208 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
37209 && mode != SFmode && mode != SDmode && mode != SCmode)))
37211 else
37212 return NULL_RTX;
37214 regno = REGNO (reg);
37216 /* The duality of the SPE register size wreaks all kinds of havoc.
37217 This is a way of distinguishing r0 in 32-bits from r0 in
37218 64-bits. */
37219 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
37220 gcc_assert (words <= 4);
37221 for (i = 0; i < words; i++, regno++)
37223 if (BYTES_BIG_ENDIAN)
37225 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37226 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
37228 else
37230 parts[2 * i] = gen_rtx_REG (SImode, regno);
37231 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37235 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
37238 /* Fill in sizes for SPE register high parts in table used by unwinder. */
37240 static void
37241 rs6000_init_dwarf_reg_sizes_extra (tree address)
37243 if (TARGET_SPE)
37245 int i;
37246 machine_mode mode = TYPE_MODE (char_type_node);
37247 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37248 rtx mem = gen_rtx_MEM (BLKmode, addr);
37249 rtx value = gen_int_mode (4, mode);
37251 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
37253 int column = DWARF_REG_TO_UNWIND_COLUMN
37254 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37255 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37257 emit_move_insn (adjust_address (mem, mode, offset), value);
37261 if (TARGET_MACHO && ! TARGET_ALTIVEC)
37263 int i;
37264 machine_mode mode = TYPE_MODE (char_type_node);
37265 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37266 rtx mem = gen_rtx_MEM (BLKmode, addr);
37267 rtx value = gen_int_mode (16, mode);
37269 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
37270 The unwinder still needs to know the size of Altivec registers. */
37272 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
37274 int column = DWARF_REG_TO_UNWIND_COLUMN
37275 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37276 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37278 emit_move_insn (adjust_address (mem, mode, offset), value);
37283 /* Map internal gcc register numbers to debug format register numbers.
37284 FORMAT specifies the type of debug register number to use:
37285 0 -- debug information, except for frame-related sections
37286 1 -- DWARF .debug_frame section
37287 2 -- DWARF .eh_frame section */
37289 unsigned int
37290 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
37292 /* We never use the GCC internal number for SPE high registers.
37293 Those are mapped to the 1200..1231 range for all debug formats. */
37294 if (SPE_HIGH_REGNO_P (regno))
37295 return regno - FIRST_SPE_HIGH_REGNO + 1200;
37297 /* Except for the above, we use the internal number for non-DWARF
37298 debug information, and also for .eh_frame. */
37299 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
37300 return regno;
37302 /* On some platforms, we use the standard DWARF register
37303 numbering for .debug_info and .debug_frame. */
37304 #ifdef RS6000_USE_DWARF_NUMBERING
37305 if (regno <= 63)
37306 return regno;
37307 if (regno == LR_REGNO)
37308 return 108;
37309 if (regno == CTR_REGNO)
37310 return 109;
37311 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
37312 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
37313 The actual code emitted saves the whole of CR, so we map CR2_REGNO
37314 to the DWARF reg for CR. */
37315 if (format == 1 && regno == CR2_REGNO)
37316 return 64;
37317 if (CR_REGNO_P (regno))
37318 return regno - CR0_REGNO + 86;
37319 if (regno == CA_REGNO)
37320 return 101; /* XER */
37321 if (ALTIVEC_REGNO_P (regno))
37322 return regno - FIRST_ALTIVEC_REGNO + 1124;
37323 if (regno == VRSAVE_REGNO)
37324 return 356;
37325 if (regno == VSCR_REGNO)
37326 return 67;
37327 if (regno == SPE_ACC_REGNO)
37328 return 99;
37329 if (regno == SPEFSCR_REGNO)
37330 return 612;
37331 #endif
37332 return regno;
37335 /* target hook eh_return_filter_mode */
37336 static machine_mode
37337 rs6000_eh_return_filter_mode (void)
37339 return TARGET_32BIT ? SImode : word_mode;
37342 /* Target hook for scalar_mode_supported_p. */
37343 static bool
37344 rs6000_scalar_mode_supported_p (machine_mode mode)
37346 /* -m32 does not support TImode. This is the default, from
37347 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
37348 same ABI as for -m32. But default_scalar_mode_supported_p allows
37349 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
37350 for -mpowerpc64. */
37351 if (TARGET_32BIT && mode == TImode)
37352 return false;
37354 if (DECIMAL_FLOAT_MODE_P (mode))
37355 return default_decimal_float_supported_p ();
37356 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
37357 return true;
37358 else
37359 return default_scalar_mode_supported_p (mode);
37362 /* Target hook for vector_mode_supported_p. */
37363 static bool
37364 rs6000_vector_mode_supported_p (machine_mode mode)
37367 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
37368 return true;
37370 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
37371 return true;
37373 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
37374 128-bit, the compiler might try to widen IEEE 128-bit to IBM
37375 double-double. */
37376 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
37377 return true;
37379 else
37380 return false;
37383 /* Target hook for floatn_mode. */
37384 static machine_mode
37385 rs6000_floatn_mode (int n, bool extended)
37387 if (extended)
37389 switch (n)
37391 case 32:
37392 return DFmode;
37394 case 64:
37395 if (TARGET_FLOAT128_KEYWORD)
37396 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37397 else
37398 return VOIDmode;
37400 case 128:
37401 return VOIDmode;
37403 default:
37404 /* Those are the only valid _FloatNx types. */
37405 gcc_unreachable ();
37408 else
37410 switch (n)
37412 case 32:
37413 return SFmode;
37415 case 64:
37416 return DFmode;
37418 case 128:
37419 if (TARGET_FLOAT128_KEYWORD)
37420 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37421 else
37422 return VOIDmode;
37424 default:
37425 return VOIDmode;
37431 /* Target hook for c_mode_for_suffix. */
37432 static machine_mode
37433 rs6000_c_mode_for_suffix (char suffix)
37435 if (TARGET_FLOAT128_TYPE)
37437 if (suffix == 'q' || suffix == 'Q')
37438 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37440 /* At the moment, we are not defining a suffix for IBM extended double.
37441 If/when the default for -mabi=ieeelongdouble is changed, and we want
37442 to support __ibm128 constants in legacy library code, we may need to
37443 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
37444 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
37445 __float80 constants. */
37448 return VOIDmode;
37451 /* Target hook for invalid_arg_for_unprototyped_fn. */
37452 static const char *
37453 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
37455 return (!rs6000_darwin64_abi
37456 && typelist == 0
37457 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
37458 && (funcdecl == NULL_TREE
37459 || (TREE_CODE (funcdecl) == FUNCTION_DECL
37460 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
37461 ? N_("AltiVec argument passed to unprototyped function")
37462 : NULL;
37465 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
37466 setup by using __stack_chk_fail_local hidden function instead of
37467 calling __stack_chk_fail directly. Otherwise it is better to call
37468 __stack_chk_fail directly. */
37470 static tree ATTRIBUTE_UNUSED
37471 rs6000_stack_protect_fail (void)
37473 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
37474 ? default_hidden_stack_protect_fail ()
37475 : default_external_stack_protect_fail ();
37478 void
37479 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
37480 int num_operands ATTRIBUTE_UNUSED)
37482 if (rs6000_warn_cell_microcode)
37484 const char *temp;
37485 int insn_code_number = recog_memoized (insn);
37486 location_t location = INSN_LOCATION (insn);
37488 /* Punt on insns we cannot recognize. */
37489 if (insn_code_number < 0)
37490 return;
37492 temp = get_insn_template (insn_code_number, insn);
37494 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
37495 warning_at (location, OPT_mwarn_cell_microcode,
37496 "emitting microcode insn %s\t[%s] #%d",
37497 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37498 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
37499 warning_at (location, OPT_mwarn_cell_microcode,
37500 "emitting conditional microcode insn %s\t[%s] #%d",
37501 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37505 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
37507 #if TARGET_ELF
37508 static unsigned HOST_WIDE_INT
37509 rs6000_asan_shadow_offset (void)
37511 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
37513 #endif
37515 /* Mask options that we want to support inside of attribute((target)) and
37516 #pragma GCC target operations. Note, we do not include things like
37517 64/32-bit, endianess, hard/soft floating point, etc. that would have
37518 different calling sequences. */
37520 struct rs6000_opt_mask {
37521 const char *name; /* option name */
37522 HOST_WIDE_INT mask; /* mask to set */
37523 bool invert; /* invert sense of mask */
37524 bool valid_target; /* option is a target option */
37527 static struct rs6000_opt_mask const rs6000_opt_masks[] =
37529 { "altivec", OPTION_MASK_ALTIVEC, false, true },
37530 { "cmpb", OPTION_MASK_CMPB, false, true },
37531 { "crypto", OPTION_MASK_CRYPTO, false, true },
37532 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
37533 { "dlmzb", OPTION_MASK_DLMZB, false, true },
37534 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
37535 false, true },
37536 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
37537 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
37538 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
37539 { "fprnd", OPTION_MASK_FPRND, false, true },
37540 { "hard-dfp", OPTION_MASK_DFP, false, true },
37541 { "htm", OPTION_MASK_HTM, false, true },
37542 { "isel", OPTION_MASK_ISEL, false, true },
37543 { "mfcrf", OPTION_MASK_MFCRF, false, true },
37544 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
37545 { "modulo", OPTION_MASK_MODULO, false, true },
37546 { "mulhw", OPTION_MASK_MULHW, false, true },
37547 { "multiple", OPTION_MASK_MULTIPLE, false, true },
37548 { "popcntb", OPTION_MASK_POPCNTB, false, true },
37549 { "popcntd", OPTION_MASK_POPCNTD, false, true },
37550 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
37551 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
37552 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
37553 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
37554 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
37555 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
37556 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
37557 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
37558 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
37559 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
37560 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
37561 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
37562 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
37563 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
37564 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
37565 { "string", OPTION_MASK_STRING, false, true },
37566 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
37567 { "update", OPTION_MASK_NO_UPDATE, true , true },
37568 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
37569 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
37570 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
37571 { "vsx", OPTION_MASK_VSX, false, true },
37572 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
37573 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
37574 #ifdef OPTION_MASK_64BIT
37575 #if TARGET_AIX_OS
37576 { "aix64", OPTION_MASK_64BIT, false, false },
37577 { "aix32", OPTION_MASK_64BIT, true, false },
37578 #else
37579 { "64", OPTION_MASK_64BIT, false, false },
37580 { "32", OPTION_MASK_64BIT, true, false },
37581 #endif
37582 #endif
37583 #ifdef OPTION_MASK_EABI
37584 { "eabi", OPTION_MASK_EABI, false, false },
37585 #endif
37586 #ifdef OPTION_MASK_LITTLE_ENDIAN
37587 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
37588 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
37589 #endif
37590 #ifdef OPTION_MASK_RELOCATABLE
37591 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
37592 #endif
37593 #ifdef OPTION_MASK_STRICT_ALIGN
37594 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
37595 #endif
37596 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
37597 { "string", OPTION_MASK_STRING, false, false },
37600 /* Builtin mask mapping for printing the flags. */
37601 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
37603 { "altivec", RS6000_BTM_ALTIVEC, false, false },
37604 { "vsx", RS6000_BTM_VSX, false, false },
37605 { "spe", RS6000_BTM_SPE, false, false },
37606 { "paired", RS6000_BTM_PAIRED, false, false },
37607 { "fre", RS6000_BTM_FRE, false, false },
37608 { "fres", RS6000_BTM_FRES, false, false },
37609 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
37610 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
37611 { "popcntd", RS6000_BTM_POPCNTD, false, false },
37612 { "cell", RS6000_BTM_CELL, false, false },
37613 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
37614 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
37615 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
37616 { "crypto", RS6000_BTM_CRYPTO, false, false },
37617 { "htm", RS6000_BTM_HTM, false, false },
37618 { "hard-dfp", RS6000_BTM_DFP, false, false },
37619 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
37620 { "long-double-128", RS6000_BTM_LDBL128, false, false },
37621 { "float128", RS6000_BTM_FLOAT128, false, false },
37624 /* Option variables that we want to support inside attribute((target)) and
37625 #pragma GCC target operations. */
37627 struct rs6000_opt_var {
37628 const char *name; /* option name */
37629 size_t global_offset; /* offset of the option in global_options. */
37630 size_t target_offset; /* offset of the option in target options. */
37633 static struct rs6000_opt_var const rs6000_opt_vars[] =
37635 { "friz",
37636 offsetof (struct gcc_options, x_TARGET_FRIZ),
37637 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
37638 { "avoid-indexed-addresses",
37639 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
37640 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
37641 { "paired",
37642 offsetof (struct gcc_options, x_rs6000_paired_float),
37643 offsetof (struct cl_target_option, x_rs6000_paired_float), },
37644 { "longcall",
37645 offsetof (struct gcc_options, x_rs6000_default_long_calls),
37646 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
37647 { "optimize-swaps",
37648 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
37649 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
37650 { "allow-movmisalign",
37651 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
37652 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
37653 { "allow-df-permute",
37654 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
37655 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
37656 { "sched-groups",
37657 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
37658 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
37659 { "always-hint",
37660 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
37661 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
37662 { "align-branch-targets",
37663 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
37664 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
37665 { "vectorize-builtins",
37666 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
37667 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
37668 { "tls-markers",
37669 offsetof (struct gcc_options, x_tls_markers),
37670 offsetof (struct cl_target_option, x_tls_markers), },
37671 { "sched-prolog",
37672 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37673 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37674 { "sched-epilog",
37675 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37676 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37677 { "gen-cell-microcode",
37678 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
37679 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
37680 { "warn-cell-microcode",
37681 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
37682 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
37685 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
37686 parsing. Return true if there were no errors. */
37688 static bool
37689 rs6000_inner_target_options (tree args, bool attr_p)
37691 bool ret = true;
37693 if (args == NULL_TREE)
37696 else if (TREE_CODE (args) == STRING_CST)
37698 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37699 char *q;
37701 while ((q = strtok (p, ",")) != NULL)
37703 bool error_p = false;
37704 bool not_valid_p = false;
37705 const char *cpu_opt = NULL;
37707 p = NULL;
37708 if (strncmp (q, "cpu=", 4) == 0)
37710 int cpu_index = rs6000_cpu_name_lookup (q+4);
37711 if (cpu_index >= 0)
37712 rs6000_cpu_index = cpu_index;
37713 else
37715 error_p = true;
37716 cpu_opt = q+4;
37719 else if (strncmp (q, "tune=", 5) == 0)
37721 int tune_index = rs6000_cpu_name_lookup (q+5);
37722 if (tune_index >= 0)
37723 rs6000_tune_index = tune_index;
37724 else
37726 error_p = true;
37727 cpu_opt = q+5;
37730 else
37732 size_t i;
37733 bool invert = false;
37734 char *r = q;
37736 error_p = true;
37737 if (strncmp (r, "no-", 3) == 0)
37739 invert = true;
37740 r += 3;
37743 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
37744 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
37746 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
37748 if (!rs6000_opt_masks[i].valid_target)
37749 not_valid_p = true;
37750 else
37752 error_p = false;
37753 rs6000_isa_flags_explicit |= mask;
37755 /* VSX needs altivec, so -mvsx automagically sets
37756 altivec and disables -mavoid-indexed-addresses. */
37757 if (!invert)
37759 if (mask == OPTION_MASK_VSX)
37761 mask |= OPTION_MASK_ALTIVEC;
37762 TARGET_AVOID_XFORM = 0;
37766 if (rs6000_opt_masks[i].invert)
37767 invert = !invert;
37769 if (invert)
37770 rs6000_isa_flags &= ~mask;
37771 else
37772 rs6000_isa_flags |= mask;
37774 break;
37777 if (error_p && !not_valid_p)
37779 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
37780 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
37782 size_t j = rs6000_opt_vars[i].global_offset;
37783 *((int *) ((char *)&global_options + j)) = !invert;
37784 error_p = false;
37785 not_valid_p = false;
37786 break;
37791 if (error_p)
37793 const char *eprefix, *esuffix;
37795 ret = false;
37796 if (attr_p)
37798 eprefix = "__attribute__((__target__(";
37799 esuffix = ")))";
37801 else
37803 eprefix = "#pragma GCC target ";
37804 esuffix = "";
37807 if (cpu_opt)
37808 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
37809 q, esuffix);
37810 else if (not_valid_p)
37811 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
37812 else
37813 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
37818 else if (TREE_CODE (args) == TREE_LIST)
37822 tree value = TREE_VALUE (args);
37823 if (value)
37825 bool ret2 = rs6000_inner_target_options (value, attr_p);
37826 if (!ret2)
37827 ret = false;
37829 args = TREE_CHAIN (args);
37831 while (args != NULL_TREE);
37834 else
37835 gcc_unreachable ();
37837 return ret;
37840 /* Print out the target options as a list for -mdebug=target. */
37842 static void
37843 rs6000_debug_target_options (tree args, const char *prefix)
37845 if (args == NULL_TREE)
37846 fprintf (stderr, "%s<NULL>", prefix);
37848 else if (TREE_CODE (args) == STRING_CST)
37850 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37851 char *q;
37853 while ((q = strtok (p, ",")) != NULL)
37855 p = NULL;
37856 fprintf (stderr, "%s\"%s\"", prefix, q);
37857 prefix = ", ";
37861 else if (TREE_CODE (args) == TREE_LIST)
37865 tree value = TREE_VALUE (args);
37866 if (value)
37868 rs6000_debug_target_options (value, prefix);
37869 prefix = ", ";
37871 args = TREE_CHAIN (args);
37873 while (args != NULL_TREE);
37876 else
37877 gcc_unreachable ();
37879 return;
37883 /* Hook to validate attribute((target("..."))). */
37885 static bool
37886 rs6000_valid_attribute_p (tree fndecl,
37887 tree ARG_UNUSED (name),
37888 tree args,
37889 int flags)
37891 struct cl_target_option cur_target;
37892 bool ret;
37893 tree old_optimize = build_optimization_node (&global_options);
37894 tree new_target, new_optimize;
37895 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37897 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
37899 if (TARGET_DEBUG_TARGET)
37901 tree tname = DECL_NAME (fndecl);
37902 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
37903 if (tname)
37904 fprintf (stderr, "function: %.*s\n",
37905 (int) IDENTIFIER_LENGTH (tname),
37906 IDENTIFIER_POINTER (tname));
37907 else
37908 fprintf (stderr, "function: unknown\n");
37910 fprintf (stderr, "args:");
37911 rs6000_debug_target_options (args, " ");
37912 fprintf (stderr, "\n");
37914 if (flags)
37915 fprintf (stderr, "flags: 0x%x\n", flags);
37917 fprintf (stderr, "--------------------\n");
37920 old_optimize = build_optimization_node (&global_options);
37921 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37923 /* If the function changed the optimization levels as well as setting target
37924 options, start with the optimizations specified. */
37925 if (func_optimize && func_optimize != old_optimize)
37926 cl_optimization_restore (&global_options,
37927 TREE_OPTIMIZATION (func_optimize));
37929 /* The target attributes may also change some optimization flags, so update
37930 the optimization options if necessary. */
37931 cl_target_option_save (&cur_target, &global_options);
37932 rs6000_cpu_index = rs6000_tune_index = -1;
37933 ret = rs6000_inner_target_options (args, true);
37935 /* Set up any additional state. */
37936 if (ret)
37938 ret = rs6000_option_override_internal (false);
37939 new_target = build_target_option_node (&global_options);
37941 else
37942 new_target = NULL;
37944 new_optimize = build_optimization_node (&global_options);
37946 if (!new_target)
37947 ret = false;
37949 else if (fndecl)
37951 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
37953 if (old_optimize != new_optimize)
37954 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
37957 cl_target_option_restore (&global_options, &cur_target);
37959 if (old_optimize != new_optimize)
37960 cl_optimization_restore (&global_options,
37961 TREE_OPTIMIZATION (old_optimize));
37963 return ret;
37967 /* Hook to validate the current #pragma GCC target and set the state, and
37968 update the macros based on what was changed. If ARGS is NULL, then
37969 POP_TARGET is used to reset the options. */
37971 bool
37972 rs6000_pragma_target_parse (tree args, tree pop_target)
37974 tree prev_tree = build_target_option_node (&global_options);
37975 tree cur_tree;
37976 struct cl_target_option *prev_opt, *cur_opt;
37977 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37978 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37980 if (TARGET_DEBUG_TARGET)
37982 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37983 fprintf (stderr, "args:");
37984 rs6000_debug_target_options (args, " ");
37985 fprintf (stderr, "\n");
37987 if (pop_target)
37989 fprintf (stderr, "pop_target:\n");
37990 debug_tree (pop_target);
37992 else
37993 fprintf (stderr, "pop_target: <NULL>\n");
37995 fprintf (stderr, "--------------------\n");
37998 if (! args)
38000 cur_tree = ((pop_target)
38001 ? pop_target
38002 : target_option_default_node);
38003 cl_target_option_restore (&global_options,
38004 TREE_TARGET_OPTION (cur_tree));
38006 else
38008 rs6000_cpu_index = rs6000_tune_index = -1;
38009 if (!rs6000_inner_target_options (args, false)
38010 || !rs6000_option_override_internal (false)
38011 || (cur_tree = build_target_option_node (&global_options))
38012 == NULL_TREE)
38014 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
38015 fprintf (stderr, "invalid pragma\n");
38017 return false;
38021 target_option_current_node = cur_tree;
38023 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
38024 change the macros that are defined. */
38025 if (rs6000_target_modify_macros_ptr)
38027 prev_opt = TREE_TARGET_OPTION (prev_tree);
38028 prev_bumask = prev_opt->x_rs6000_builtin_mask;
38029 prev_flags = prev_opt->x_rs6000_isa_flags;
38031 cur_opt = TREE_TARGET_OPTION (cur_tree);
38032 cur_flags = cur_opt->x_rs6000_isa_flags;
38033 cur_bumask = cur_opt->x_rs6000_builtin_mask;
38035 diff_bumask = (prev_bumask ^ cur_bumask);
38036 diff_flags = (prev_flags ^ cur_flags);
38038 if ((diff_flags != 0) || (diff_bumask != 0))
38040 /* Delete old macros. */
38041 rs6000_target_modify_macros_ptr (false,
38042 prev_flags & diff_flags,
38043 prev_bumask & diff_bumask);
38045 /* Define new macros. */
38046 rs6000_target_modify_macros_ptr (true,
38047 cur_flags & diff_flags,
38048 cur_bumask & diff_bumask);
38052 return true;
38056 /* Remember the last target of rs6000_set_current_function. */
38057 static GTY(()) tree rs6000_previous_fndecl;
38059 /* Establish appropriate back-end context for processing the function
38060 FNDECL. The argument might be NULL to indicate processing at top
38061 level, outside of any function scope. */
38062 static void
38063 rs6000_set_current_function (tree fndecl)
38065 tree old_tree = (rs6000_previous_fndecl
38066 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
38067 : NULL_TREE);
38069 tree new_tree = (fndecl
38070 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
38071 : NULL_TREE);
38073 if (TARGET_DEBUG_TARGET)
38075 bool print_final = false;
38076 fprintf (stderr, "\n==================== rs6000_set_current_function");
38078 if (fndecl)
38079 fprintf (stderr, ", fndecl %s (%p)",
38080 (DECL_NAME (fndecl)
38081 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
38082 : "<unknown>"), (void *)fndecl);
38084 if (rs6000_previous_fndecl)
38085 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
38087 fprintf (stderr, "\n");
38088 if (new_tree)
38090 fprintf (stderr, "\nnew fndecl target specific options:\n");
38091 debug_tree (new_tree);
38092 print_final = true;
38095 if (old_tree)
38097 fprintf (stderr, "\nold fndecl target specific options:\n");
38098 debug_tree (old_tree);
38099 print_final = true;
38102 if (print_final)
38103 fprintf (stderr, "--------------------\n");
38106 /* Only change the context if the function changes. This hook is called
38107 several times in the course of compiling a function, and we don't want to
38108 slow things down too much or call target_reinit when it isn't safe. */
38109 if (fndecl && fndecl != rs6000_previous_fndecl)
38111 rs6000_previous_fndecl = fndecl;
38112 if (old_tree == new_tree)
38115 else if (new_tree && new_tree != target_option_default_node)
38117 cl_target_option_restore (&global_options,
38118 TREE_TARGET_OPTION (new_tree));
38119 if (TREE_TARGET_GLOBALS (new_tree))
38120 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38121 else
38122 TREE_TARGET_GLOBALS (new_tree)
38123 = save_target_globals_default_opts ();
38126 else if (old_tree && old_tree != target_option_default_node)
38128 new_tree = target_option_current_node;
38129 cl_target_option_restore (&global_options,
38130 TREE_TARGET_OPTION (new_tree));
38131 if (TREE_TARGET_GLOBALS (new_tree))
38132 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38133 else if (new_tree == target_option_default_node)
38134 restore_target_globals (&default_target_globals);
38135 else
38136 TREE_TARGET_GLOBALS (new_tree)
38137 = save_target_globals_default_opts ();
38143 /* Save the current options */
38145 static void
38146 rs6000_function_specific_save (struct cl_target_option *ptr,
38147 struct gcc_options *opts)
38149 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
38150 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
38153 /* Restore the current options */
38155 static void
38156 rs6000_function_specific_restore (struct gcc_options *opts,
38157 struct cl_target_option *ptr)
38160 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
38161 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
38162 (void) rs6000_option_override_internal (false);
38165 /* Print the current options */
38167 static void
38168 rs6000_function_specific_print (FILE *file, int indent,
38169 struct cl_target_option *ptr)
38171 rs6000_print_isa_options (file, indent, "Isa options set",
38172 ptr->x_rs6000_isa_flags);
38174 rs6000_print_isa_options (file, indent, "Isa options explicit",
38175 ptr->x_rs6000_isa_flags_explicit);
38178 /* Helper function to print the current isa or misc options on a line. */
38180 static void
38181 rs6000_print_options_internal (FILE *file,
38182 int indent,
38183 const char *string,
38184 HOST_WIDE_INT flags,
38185 const char *prefix,
38186 const struct rs6000_opt_mask *opts,
38187 size_t num_elements)
38189 size_t i;
38190 size_t start_column = 0;
38191 size_t cur_column;
38192 size_t max_column = 120;
38193 size_t prefix_len = strlen (prefix);
38194 size_t comma_len = 0;
38195 const char *comma = "";
38197 if (indent)
38198 start_column += fprintf (file, "%*s", indent, "");
38200 if (!flags)
38202 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
38203 return;
38206 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
38208 /* Print the various mask options. */
38209 cur_column = start_column;
38210 for (i = 0; i < num_elements; i++)
38212 bool invert = opts[i].invert;
38213 const char *name = opts[i].name;
38214 const char *no_str = "";
38215 HOST_WIDE_INT mask = opts[i].mask;
38216 size_t len = comma_len + prefix_len + strlen (name);
38218 if (!invert)
38220 if ((flags & mask) == 0)
38222 no_str = "no-";
38223 len += sizeof ("no-") - 1;
38226 flags &= ~mask;
38229 else
38231 if ((flags & mask) != 0)
38233 no_str = "no-";
38234 len += sizeof ("no-") - 1;
38237 flags |= mask;
38240 cur_column += len;
38241 if (cur_column > max_column)
38243 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
38244 cur_column = start_column + len;
38245 comma = "";
38248 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
38249 comma = ", ";
38250 comma_len = sizeof (", ") - 1;
38253 fputs ("\n", file);
38256 /* Helper function to print the current isa options on a line. */
38258 static void
38259 rs6000_print_isa_options (FILE *file, int indent, const char *string,
38260 HOST_WIDE_INT flags)
38262 rs6000_print_options_internal (file, indent, string, flags, "-m",
38263 &rs6000_opt_masks[0],
38264 ARRAY_SIZE (rs6000_opt_masks));
38267 static void
38268 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
38269 HOST_WIDE_INT flags)
38271 rs6000_print_options_internal (file, indent, string, flags, "",
38272 &rs6000_builtin_mask_names[0],
38273 ARRAY_SIZE (rs6000_builtin_mask_names));
38277 /* Hook to determine if one function can safely inline another. */
38279 static bool
38280 rs6000_can_inline_p (tree caller, tree callee)
38282 bool ret = false;
38283 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
38284 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
38286 /* If callee has no option attributes, then it is ok to inline. */
38287 if (!callee_tree)
38288 ret = true;
38290 /* If caller has no option attributes, but callee does then it is not ok to
38291 inline. */
38292 else if (!caller_tree)
38293 ret = false;
38295 else
38297 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
38298 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
38300 /* Callee's options should a subset of the caller's, i.e. a vsx function
38301 can inline an altivec function but a non-vsx function can't inline a
38302 vsx function. */
38303 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
38304 == callee_opts->x_rs6000_isa_flags)
38305 ret = true;
38308 if (TARGET_DEBUG_TARGET)
38309 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
38310 (DECL_NAME (caller)
38311 ? IDENTIFIER_POINTER (DECL_NAME (caller))
38312 : "<unknown>"),
38313 (DECL_NAME (callee)
38314 ? IDENTIFIER_POINTER (DECL_NAME (callee))
38315 : "<unknown>"),
38316 (ret ? "can" : "cannot"));
38318 return ret;
38321 /* Allocate a stack temp and fixup the address so it meets the particular
38322 memory requirements (either offetable or REG+REG addressing). */
38325 rs6000_allocate_stack_temp (machine_mode mode,
38326 bool offsettable_p,
38327 bool reg_reg_p)
38329 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
38330 rtx addr = XEXP (stack, 0);
38331 int strict_p = (reload_in_progress || reload_completed);
38333 if (!legitimate_indirect_address_p (addr, strict_p))
38335 if (offsettable_p
38336 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
38337 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38339 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
38340 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38343 return stack;
38346 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
38347 to such a form to deal with memory reference instructions like STFIWX that
38348 only take reg+reg addressing. */
38351 rs6000_address_for_fpconvert (rtx x)
38353 int strict_p = (reload_in_progress || reload_completed);
38354 rtx addr;
38356 gcc_assert (MEM_P (x));
38357 addr = XEXP (x, 0);
38358 if (! legitimate_indirect_address_p (addr, strict_p)
38359 && ! legitimate_indexed_address_p (addr, strict_p))
38361 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
38363 rtx reg = XEXP (addr, 0);
38364 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
38365 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
38366 gcc_assert (REG_P (reg));
38367 emit_insn (gen_add3_insn (reg, reg, size_rtx));
38368 addr = reg;
38370 else if (GET_CODE (addr) == PRE_MODIFY)
38372 rtx reg = XEXP (addr, 0);
38373 rtx expr = XEXP (addr, 1);
38374 gcc_assert (REG_P (reg));
38375 gcc_assert (GET_CODE (expr) == PLUS);
38376 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
38377 addr = reg;
38380 x = replace_equiv_address (x, copy_addr_to_reg (addr));
38383 return x;
38386 /* Given a memory reference, if it is not in the form for altivec memory
38387 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
38388 convert to the altivec format. */
38391 rs6000_address_for_altivec (rtx x)
38393 gcc_assert (MEM_P (x));
38394 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
38396 rtx addr = XEXP (x, 0);
38397 int strict_p = (reload_in_progress || reload_completed);
38399 if (!legitimate_indexed_address_p (addr, strict_p)
38400 && !legitimate_indirect_address_p (addr, strict_p))
38401 addr = copy_to_mode_reg (Pmode, addr);
38403 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
38404 x = change_address (x, GET_MODE (x), addr);
38407 return x;
38410 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
38412 On the RS/6000, all integer constants are acceptable, most won't be valid
38413 for particular insns, though. Only easy FP constants are acceptable. */
38415 static bool
38416 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
38418 if (TARGET_ELF && tls_referenced_p (x))
38419 return false;
38421 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
38422 || GET_MODE (x) == VOIDmode
38423 || (TARGET_POWERPC64 && mode == DImode)
38424 || easy_fp_constant (x, mode)
38425 || easy_vector_constant (x, mode));
38429 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
38431 static bool
38432 chain_already_loaded (rtx_insn *last)
38434 for (; last != NULL; last = PREV_INSN (last))
38436 if (NONJUMP_INSN_P (last))
38438 rtx patt = PATTERN (last);
38440 if (GET_CODE (patt) == SET)
38442 rtx lhs = XEXP (patt, 0);
38444 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
38445 return true;
38449 return false;
38452 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
38454 void
38455 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38457 const bool direct_call_p
38458 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
38459 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
38460 rtx toc_load = NULL_RTX;
38461 rtx toc_restore = NULL_RTX;
38462 rtx func_addr;
38463 rtx abi_reg = NULL_RTX;
38464 rtx call[4];
38465 int n_call;
38466 rtx insn;
38468 /* Handle longcall attributes. */
38469 if (INTVAL (cookie) & CALL_LONG)
38470 func_desc = rs6000_longcall_ref (func_desc);
38472 /* Handle indirect calls. */
38473 if (GET_CODE (func_desc) != SYMBOL_REF
38474 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
38476 /* Save the TOC into its reserved slot before the call,
38477 and prepare to restore it after the call. */
38478 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
38479 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
38480 rtx stack_toc_mem = gen_frame_mem (Pmode,
38481 gen_rtx_PLUS (Pmode, stack_ptr,
38482 stack_toc_offset));
38483 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38484 gen_rtvec (1, stack_toc_offset),
38485 UNSPEC_TOCSLOT);
38486 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38488 /* Can we optimize saving the TOC in the prologue or
38489 do we need to do it at every call? */
38490 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38491 cfun->machine->save_toc_in_prologue = true;
38492 else
38494 MEM_VOLATILE_P (stack_toc_mem) = 1;
38495 emit_move_insn (stack_toc_mem, toc_reg);
38498 if (DEFAULT_ABI == ABI_ELFv2)
38500 /* A function pointer in the ELFv2 ABI is just a plain address, but
38501 the ABI requires it to be loaded into r12 before the call. */
38502 func_addr = gen_rtx_REG (Pmode, 12);
38503 emit_move_insn (func_addr, func_desc);
38504 abi_reg = func_addr;
38506 else
38508 /* A function pointer under AIX is a pointer to a data area whose
38509 first word contains the actual address of the function, whose
38510 second word contains a pointer to its TOC, and whose third word
38511 contains a value to place in the static chain register (r11).
38512 Note that if we load the static chain, our "trampoline" need
38513 not have any executable code. */
38515 /* Load up address of the actual function. */
38516 func_desc = force_reg (Pmode, func_desc);
38517 func_addr = gen_reg_rtx (Pmode);
38518 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38520 /* Prepare to load the TOC of the called function. Note that the
38521 TOC load must happen immediately before the actual call so
38522 that unwinding the TOC registers works correctly. See the
38523 comment in frob_update_context. */
38524 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38525 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38526 gen_rtx_PLUS (Pmode, func_desc,
38527 func_toc_offset));
38528 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38530 /* If we have a static chain, load it up. But, if the call was
38531 originally direct, the 3rd word has not been written since no
38532 trampoline has been built, so we ought not to load it, lest we
38533 override a static chain value. */
38534 if (!direct_call_p
38535 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38536 && !chain_already_loaded (get_current_sequence ()->next->last))
38538 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38539 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38540 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38541 gen_rtx_PLUS (Pmode, func_desc,
38542 func_sc_offset));
38543 emit_move_insn (sc_reg, func_sc_mem);
38544 abi_reg = sc_reg;
38548 else
38550 /* Direct calls use the TOC: for local calls, the callee will
38551 assume the TOC register is set; for non-local calls, the
38552 PLT stub needs the TOC register. */
38553 abi_reg = toc_reg;
38554 func_addr = func_desc;
38557 /* Create the call. */
38558 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38559 if (value != NULL_RTX)
38560 call[0] = gen_rtx_SET (value, call[0]);
38561 n_call = 1;
38563 if (toc_load)
38564 call[n_call++] = toc_load;
38565 if (toc_restore)
38566 call[n_call++] = toc_restore;
38568 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38570 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38571 insn = emit_call_insn (insn);
38573 /* Mention all registers defined by the ABI to hold information
38574 as uses in CALL_INSN_FUNCTION_USAGE. */
38575 if (abi_reg)
38576 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38579 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38581 void
38582 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38584 rtx call[2];
38585 rtx insn;
38587 gcc_assert (INTVAL (cookie) == 0);
38589 /* Create the call. */
38590 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38591 if (value != NULL_RTX)
38592 call[0] = gen_rtx_SET (value, call[0]);
38594 call[1] = simple_return_rtx;
38596 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38597 insn = emit_call_insn (insn);
38599 /* Note use of the TOC register. */
38600 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38603 /* Return whether we need to always update the saved TOC pointer when we update
38604 the stack pointer. */
38606 static bool
38607 rs6000_save_toc_in_prologue_p (void)
38609 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38612 #ifdef HAVE_GAS_HIDDEN
38613 # define USE_HIDDEN_LINKONCE 1
38614 #else
38615 # define USE_HIDDEN_LINKONCE 0
38616 #endif
38618 /* Fills in the label name that should be used for a 476 link stack thunk. */
38620 void
38621 get_ppc476_thunk_name (char name[32])
38623 gcc_assert (TARGET_LINK_STACK);
38625 if (USE_HIDDEN_LINKONCE)
38626 sprintf (name, "__ppc476.get_thunk");
38627 else
38628 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38631 /* This function emits the simple thunk routine that is used to preserve
38632 the link stack on the 476 cpu. */
38634 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38635 static void
38636 rs6000_code_end (void)
38638 char name[32];
38639 tree decl;
38641 if (!TARGET_LINK_STACK)
38642 return;
38644 get_ppc476_thunk_name (name);
38646 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38647 build_function_type_list (void_type_node, NULL_TREE));
38648 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38649 NULL_TREE, void_type_node);
38650 TREE_PUBLIC (decl) = 1;
38651 TREE_STATIC (decl) = 1;
38653 #if RS6000_WEAK
38654 if (USE_HIDDEN_LINKONCE)
38656 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38657 targetm.asm_out.unique_section (decl, 0);
38658 switch_to_section (get_named_section (decl, NULL, 0));
38659 DECL_WEAK (decl) = 1;
38660 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38661 targetm.asm_out.globalize_label (asm_out_file, name);
38662 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38663 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38665 else
38666 #endif
38668 switch_to_section (text_section);
38669 ASM_OUTPUT_LABEL (asm_out_file, name);
38672 DECL_INITIAL (decl) = make_node (BLOCK);
38673 current_function_decl = decl;
38674 allocate_struct_function (decl, false);
38675 init_function_start (decl);
38676 first_function_block_is_cold = false;
38677 /* Make sure unwind info is emitted for the thunk if needed. */
38678 final_start_function (emit_barrier (), asm_out_file, 1);
38680 fputs ("\tblr\n", asm_out_file);
38682 final_end_function ();
38683 init_insn_lengths ();
38684 free_after_compilation (cfun);
38685 set_cfun (NULL);
38686 current_function_decl = NULL;
38689 /* Add r30 to hard reg set if the prologue sets it up and it is not
38690 pic_offset_table_rtx. */
38692 static void
38693 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38695 if (!TARGET_SINGLE_PIC_BASE
38696 && TARGET_TOC
38697 && TARGET_MINIMAL_TOC
38698 && get_pool_size () != 0)
38699 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38700 if (cfun->machine->split_stack_argp_used)
38701 add_to_hard_reg_set (&set->set, Pmode, 12);
38705 /* Helper function for rs6000_split_logical to emit a logical instruction after
38706 spliting the operation to single GPR registers.
38708 DEST is the destination register.
38709 OP1 and OP2 are the input source registers.
38710 CODE is the base operation (AND, IOR, XOR, NOT).
38711 MODE is the machine mode.
38712 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38713 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38714 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38716 static void
38717 rs6000_split_logical_inner (rtx dest,
38718 rtx op1,
38719 rtx op2,
38720 enum rtx_code code,
38721 machine_mode mode,
38722 bool complement_final_p,
38723 bool complement_op1_p,
38724 bool complement_op2_p)
38726 rtx bool_rtx;
38728 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38729 if (op2 && GET_CODE (op2) == CONST_INT
38730 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38731 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38733 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38734 HOST_WIDE_INT value = INTVAL (op2) & mask;
38736 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38737 if (code == AND)
38739 if (value == 0)
38741 emit_insn (gen_rtx_SET (dest, const0_rtx));
38742 return;
38745 else if (value == mask)
38747 if (!rtx_equal_p (dest, op1))
38748 emit_insn (gen_rtx_SET (dest, op1));
38749 return;
38753 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38754 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38755 else if (code == IOR || code == XOR)
38757 if (value == 0)
38759 if (!rtx_equal_p (dest, op1))
38760 emit_insn (gen_rtx_SET (dest, op1));
38761 return;
38766 if (code == AND && mode == SImode
38767 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38769 emit_insn (gen_andsi3 (dest, op1, op2));
38770 return;
38773 if (complement_op1_p)
38774 op1 = gen_rtx_NOT (mode, op1);
38776 if (complement_op2_p)
38777 op2 = gen_rtx_NOT (mode, op2);
38779 /* For canonical RTL, if only one arm is inverted it is the first. */
38780 if (!complement_op1_p && complement_op2_p)
38781 std::swap (op1, op2);
38783 bool_rtx = ((code == NOT)
38784 ? gen_rtx_NOT (mode, op1)
38785 : gen_rtx_fmt_ee (code, mode, op1, op2));
38787 if (complement_final_p)
38788 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38790 emit_insn (gen_rtx_SET (dest, bool_rtx));
38793 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38794 operations are split immediately during RTL generation to allow for more
38795 optimizations of the AND/IOR/XOR.
38797 OPERANDS is an array containing the destination and two input operands.
38798 CODE is the base operation (AND, IOR, XOR, NOT).
38799 MODE is the machine mode.
38800 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38801 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38802 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38803 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38804 formation of the AND instructions. */
38806 static void
38807 rs6000_split_logical_di (rtx operands[3],
38808 enum rtx_code code,
38809 bool complement_final_p,
38810 bool complement_op1_p,
38811 bool complement_op2_p)
38813 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38814 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38815 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38816 enum hi_lo { hi = 0, lo = 1 };
38817 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38818 size_t i;
38820 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38821 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38822 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38823 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38825 if (code == NOT)
38826 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38827 else
38829 if (GET_CODE (operands[2]) != CONST_INT)
38831 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38832 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38834 else
38836 HOST_WIDE_INT value = INTVAL (operands[2]);
38837 HOST_WIDE_INT value_hi_lo[2];
38839 gcc_assert (!complement_final_p);
38840 gcc_assert (!complement_op1_p);
38841 gcc_assert (!complement_op2_p);
38843 value_hi_lo[hi] = value >> 32;
38844 value_hi_lo[lo] = value & lower_32bits;
38846 for (i = 0; i < 2; i++)
38848 HOST_WIDE_INT sub_value = value_hi_lo[i];
38850 if (sub_value & sign_bit)
38851 sub_value |= upper_32bits;
38853 op2_hi_lo[i] = GEN_INT (sub_value);
38855 /* If this is an AND instruction, check to see if we need to load
38856 the value in a register. */
38857 if (code == AND && sub_value != -1 && sub_value != 0
38858 && !and_operand (op2_hi_lo[i], SImode))
38859 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38864 for (i = 0; i < 2; i++)
38866 /* Split large IOR/XOR operations. */
38867 if ((code == IOR || code == XOR)
38868 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38869 && !complement_final_p
38870 && !complement_op1_p
38871 && !complement_op2_p
38872 && !logical_const_operand (op2_hi_lo[i], SImode))
38874 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38875 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38876 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38877 rtx tmp = gen_reg_rtx (SImode);
38879 /* Make sure the constant is sign extended. */
38880 if ((hi_16bits & sign_bit) != 0)
38881 hi_16bits |= upper_32bits;
38883 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38884 code, SImode, false, false, false);
38886 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38887 code, SImode, false, false, false);
38889 else
38890 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38891 code, SImode, complement_final_p,
38892 complement_op1_p, complement_op2_p);
38895 return;
38898 /* Split the insns that make up boolean operations operating on multiple GPR
38899 registers. The boolean MD patterns ensure that the inputs either are
38900 exactly the same as the output registers, or there is no overlap.
38902 OPERANDS is an array containing the destination and two input operands.
38903 CODE is the base operation (AND, IOR, XOR, NOT).
38904 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38905 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38906 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38908 void
38909 rs6000_split_logical (rtx operands[3],
38910 enum rtx_code code,
38911 bool complement_final_p,
38912 bool complement_op1_p,
38913 bool complement_op2_p)
38915 machine_mode mode = GET_MODE (operands[0]);
38916 machine_mode sub_mode;
38917 rtx op0, op1, op2;
38918 int sub_size, regno0, regno1, nregs, i;
38920 /* If this is DImode, use the specialized version that can run before
38921 register allocation. */
38922 if (mode == DImode && !TARGET_POWERPC64)
38924 rs6000_split_logical_di (operands, code, complement_final_p,
38925 complement_op1_p, complement_op2_p);
38926 return;
38929 op0 = operands[0];
38930 op1 = operands[1];
38931 op2 = (code == NOT) ? NULL_RTX : operands[2];
38932 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38933 sub_size = GET_MODE_SIZE (sub_mode);
38934 regno0 = REGNO (op0);
38935 regno1 = REGNO (op1);
38937 gcc_assert (reload_completed);
38938 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38939 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38941 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38942 gcc_assert (nregs > 1);
38944 if (op2 && REG_P (op2))
38945 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38947 for (i = 0; i < nregs; i++)
38949 int offset = i * sub_size;
38950 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38951 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38952 rtx sub_op2 = ((code == NOT)
38953 ? NULL_RTX
38954 : simplify_subreg (sub_mode, op2, mode, offset));
38956 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38957 complement_final_p, complement_op1_p,
38958 complement_op2_p);
38961 return;
38965 /* Return true if the peephole2 can combine a load involving a combination of
38966 an addis instruction and a load with an offset that can be fused together on
38967 a power8. */
38969 bool
38970 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38971 rtx addis_value, /* addis value. */
38972 rtx target, /* target register that is loaded. */
38973 rtx mem) /* bottom part of the memory addr. */
38975 rtx addr;
38976 rtx base_reg;
38978 /* Validate arguments. */
38979 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38980 return false;
38982 if (!base_reg_operand (target, GET_MODE (target)))
38983 return false;
38985 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38986 return false;
38988 /* Allow sign/zero extension. */
38989 if (GET_CODE (mem) == ZERO_EXTEND
38990 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38991 mem = XEXP (mem, 0);
38993 if (!MEM_P (mem))
38994 return false;
38996 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38997 return false;
38999 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39000 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
39001 return false;
39003 /* Validate that the register used to load the high value is either the
39004 register being loaded, or we can safely replace its use.
39006 This function is only called from the peephole2 pass and we assume that
39007 there are 2 instructions in the peephole (addis and load), so we want to
39008 check if the target register was not used in the memory address and the
39009 register to hold the addis result is dead after the peephole. */
39010 if (REGNO (addis_reg) != REGNO (target))
39012 if (reg_mentioned_p (target, mem))
39013 return false;
39015 if (!peep2_reg_dead_p (2, addis_reg))
39016 return false;
39018 /* If the target register being loaded is the stack pointer, we must
39019 avoid loading any other value into it, even temporarily. */
39020 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
39021 return false;
39024 base_reg = XEXP (addr, 0);
39025 return REGNO (addis_reg) == REGNO (base_reg);
39028 /* During the peephole2 pass, adjust and expand the insns for a load fusion
39029 sequence. We adjust the addis register to use the target register. If the
39030 load sign extends, we adjust the code to do the zero extending load, and an
39031 explicit sign extension later since the fusion only covers zero extending
39032 loads.
39034 The operands are:
39035 operands[0] register set with addis (to be replaced with target)
39036 operands[1] value set via addis
39037 operands[2] target register being loaded
39038 operands[3] D-form memory reference using operands[0]. */
39040 void
39041 expand_fusion_gpr_load (rtx *operands)
39043 rtx addis_value = operands[1];
39044 rtx target = operands[2];
39045 rtx orig_mem = operands[3];
39046 rtx new_addr, new_mem, orig_addr, offset;
39047 enum rtx_code plus_or_lo_sum;
39048 machine_mode target_mode = GET_MODE (target);
39049 machine_mode extend_mode = target_mode;
39050 machine_mode ptr_mode = Pmode;
39051 enum rtx_code extend = UNKNOWN;
39053 if (GET_CODE (orig_mem) == ZERO_EXTEND
39054 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
39056 extend = GET_CODE (orig_mem);
39057 orig_mem = XEXP (orig_mem, 0);
39058 target_mode = GET_MODE (orig_mem);
39061 gcc_assert (MEM_P (orig_mem));
39063 orig_addr = XEXP (orig_mem, 0);
39064 plus_or_lo_sum = GET_CODE (orig_addr);
39065 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39067 offset = XEXP (orig_addr, 1);
39068 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39069 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39071 if (extend != UNKNOWN)
39072 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
39074 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39075 UNSPEC_FUSION_GPR);
39076 emit_insn (gen_rtx_SET (target, new_mem));
39078 if (extend == SIGN_EXTEND)
39080 int sub_off = ((BYTES_BIG_ENDIAN)
39081 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
39082 : 0);
39083 rtx sign_reg
39084 = simplify_subreg (target_mode, target, extend_mode, sub_off);
39086 emit_insn (gen_rtx_SET (target,
39087 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
39090 return;
39093 /* Emit the addis instruction that will be part of a fused instruction
39094 sequence. */
39096 void
39097 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
39098 const char *mode_name)
39100 rtx fuse_ops[10];
39101 char insn_template[80];
39102 const char *addis_str = NULL;
39103 const char *comment_str = ASM_COMMENT_START;
39105 if (*comment_str == ' ')
39106 comment_str++;
39108 /* Emit the addis instruction. */
39109 fuse_ops[0] = target;
39110 if (satisfies_constraint_L (addis_value))
39112 fuse_ops[1] = addis_value;
39113 addis_str = "lis %0,%v1";
39116 else if (GET_CODE (addis_value) == PLUS)
39118 rtx op0 = XEXP (addis_value, 0);
39119 rtx op1 = XEXP (addis_value, 1);
39121 if (REG_P (op0) && CONST_INT_P (op1)
39122 && satisfies_constraint_L (op1))
39124 fuse_ops[1] = op0;
39125 fuse_ops[2] = op1;
39126 addis_str = "addis %0,%1,%v2";
39130 else if (GET_CODE (addis_value) == HIGH)
39132 rtx value = XEXP (addis_value, 0);
39133 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
39135 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
39136 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
39137 if (TARGET_ELF)
39138 addis_str = "addis %0,%2,%1@toc@ha";
39140 else if (TARGET_XCOFF)
39141 addis_str = "addis %0,%1@u(%2)";
39143 else
39144 gcc_unreachable ();
39147 else if (GET_CODE (value) == PLUS)
39149 rtx op0 = XEXP (value, 0);
39150 rtx op1 = XEXP (value, 1);
39152 if (GET_CODE (op0) == UNSPEC
39153 && XINT (op0, 1) == UNSPEC_TOCREL
39154 && CONST_INT_P (op1))
39156 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
39157 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
39158 fuse_ops[3] = op1;
39159 if (TARGET_ELF)
39160 addis_str = "addis %0,%2,%1+%3@toc@ha";
39162 else if (TARGET_XCOFF)
39163 addis_str = "addis %0,%1+%3@u(%2)";
39165 else
39166 gcc_unreachable ();
39170 else if (satisfies_constraint_L (value))
39172 fuse_ops[1] = value;
39173 addis_str = "lis %0,%v1";
39176 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
39178 fuse_ops[1] = value;
39179 addis_str = "lis %0,%1@ha";
39183 if (!addis_str)
39184 fatal_insn ("Could not generate addis value for fusion", addis_value);
39186 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
39187 comment, mode_name);
39188 output_asm_insn (insn_template, fuse_ops);
39191 /* Emit a D-form load or store instruction that is the second instruction
39192 of a fusion sequence. */
39194 void
39195 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
39196 const char *insn_str)
39198 rtx fuse_ops[10];
39199 char insn_template[80];
39201 fuse_ops[0] = load_store_reg;
39202 fuse_ops[1] = addis_reg;
39204 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
39206 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
39207 fuse_ops[2] = offset;
39208 output_asm_insn (insn_template, fuse_ops);
39211 else if (GET_CODE (offset) == UNSPEC
39212 && XINT (offset, 1) == UNSPEC_TOCREL)
39214 if (TARGET_ELF)
39215 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
39217 else if (TARGET_XCOFF)
39218 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39220 else
39221 gcc_unreachable ();
39223 fuse_ops[2] = XVECEXP (offset, 0, 0);
39224 output_asm_insn (insn_template, fuse_ops);
39227 else if (GET_CODE (offset) == PLUS
39228 && GET_CODE (XEXP (offset, 0)) == UNSPEC
39229 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
39230 && CONST_INT_P (XEXP (offset, 1)))
39232 rtx tocrel_unspec = XEXP (offset, 0);
39233 if (TARGET_ELF)
39234 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
39236 else if (TARGET_XCOFF)
39237 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
39239 else
39240 gcc_unreachable ();
39242 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
39243 fuse_ops[3] = XEXP (offset, 1);
39244 output_asm_insn (insn_template, fuse_ops);
39247 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
39249 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39251 fuse_ops[2] = offset;
39252 output_asm_insn (insn_template, fuse_ops);
39255 else
39256 fatal_insn ("Unable to generate load/store offset for fusion", offset);
39258 return;
39261 /* Wrap a TOC address that can be fused to indicate that special fusion
39262 processing is needed. */
39265 fusion_wrap_memory_address (rtx old_mem)
39267 rtx old_addr = XEXP (old_mem, 0);
39268 rtvec v = gen_rtvec (1, old_addr);
39269 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
39270 return replace_equiv_address_nv (old_mem, new_addr, false);
39273 /* Given an address, convert it into the addis and load offset parts. Addresses
39274 created during the peephole2 process look like:
39275 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
39276 (unspec [(...)] UNSPEC_TOCREL))
39278 Addresses created via toc fusion look like:
39279 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
39281 static void
39282 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
39284 rtx hi, lo;
39286 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
39288 lo = XVECEXP (addr, 0, 0);
39289 hi = gen_rtx_HIGH (Pmode, lo);
39291 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
39293 hi = XEXP (addr, 0);
39294 lo = XEXP (addr, 1);
39296 else
39297 gcc_unreachable ();
39299 *p_hi = hi;
39300 *p_lo = lo;
39303 /* Return a string to fuse an addis instruction with a gpr load to the same
39304 register that we loaded up the addis instruction. The address that is used
39305 is the logical address that was formed during peephole2:
39306 (lo_sum (high) (low-part))
39308 Or the address is the TOC address that is wrapped before register allocation:
39309 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
39311 The code is complicated, so we call output_asm_insn directly, and just
39312 return "". */
39314 const char *
39315 emit_fusion_gpr_load (rtx target, rtx mem)
39317 rtx addis_value;
39318 rtx addr;
39319 rtx load_offset;
39320 const char *load_str = NULL;
39321 const char *mode_name = NULL;
39322 machine_mode mode;
39324 if (GET_CODE (mem) == ZERO_EXTEND)
39325 mem = XEXP (mem, 0);
39327 gcc_assert (REG_P (target) && MEM_P (mem));
39329 addr = XEXP (mem, 0);
39330 fusion_split_address (addr, &addis_value, &load_offset);
39332 /* Now emit the load instruction to the same register. */
39333 mode = GET_MODE (mem);
39334 switch (mode)
39336 case QImode:
39337 mode_name = "char";
39338 load_str = "lbz";
39339 break;
39341 case HImode:
39342 mode_name = "short";
39343 load_str = "lhz";
39344 break;
39346 case SImode:
39347 case SFmode:
39348 mode_name = (mode == SFmode) ? "float" : "int";
39349 load_str = "lwz";
39350 break;
39352 case DImode:
39353 case DFmode:
39354 gcc_assert (TARGET_POWERPC64);
39355 mode_name = (mode == DFmode) ? "double" : "long";
39356 load_str = "ld";
39357 break;
39359 default:
39360 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
39363 /* Emit the addis instruction. */
39364 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
39366 /* Emit the D-form load instruction. */
39367 emit_fusion_load_store (target, target, load_offset, load_str);
39369 return "";
39373 /* Return true if the peephole2 can combine a load/store involving a
39374 combination of an addis instruction and the memory operation. This was
39375 added to the ISA 3.0 (power9) hardware. */
39377 bool
39378 fusion_p9_p (rtx addis_reg, /* register set via addis. */
39379 rtx addis_value, /* addis value. */
39380 rtx dest, /* destination (memory or register). */
39381 rtx src) /* source (register or memory). */
39383 rtx addr, mem, offset;
39384 enum machine_mode mode = GET_MODE (src);
39386 /* Validate arguments. */
39387 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
39388 return false;
39390 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
39391 return false;
39393 /* Ignore extend operations that are part of the load. */
39394 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
39395 src = XEXP (src, 0);
39397 /* Test for memory<-register or register<-memory. */
39398 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
39400 if (!MEM_P (dest))
39401 return false;
39403 mem = dest;
39406 else if (MEM_P (src))
39408 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
39409 return false;
39411 mem = src;
39414 else
39415 return false;
39417 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39418 if (GET_CODE (addr) == PLUS)
39420 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39421 return false;
39423 return satisfies_constraint_I (XEXP (addr, 1));
39426 else if (GET_CODE (addr) == LO_SUM)
39428 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39429 return false;
39431 offset = XEXP (addr, 1);
39432 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
39433 return small_toc_ref (offset, GET_MODE (offset));
39435 else if (TARGET_ELF && !TARGET_POWERPC64)
39436 return CONSTANT_P (offset);
39439 return false;
39442 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39443 load sequence.
39445 The operands are:
39446 operands[0] register set with addis
39447 operands[1] value set via addis
39448 operands[2] target register being loaded
39449 operands[3] D-form memory reference using operands[0].
39451 This is similar to the fusion introduced with power8, except it scales to
39452 both loads/stores and does not require the result register to be the same as
39453 the base register. At the moment, we only do this if register set with addis
39454 is dead. */
39456 void
39457 expand_fusion_p9_load (rtx *operands)
39459 rtx tmp_reg = operands[0];
39460 rtx addis_value = operands[1];
39461 rtx target = operands[2];
39462 rtx orig_mem = operands[3];
39463 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
39464 enum rtx_code plus_or_lo_sum;
39465 machine_mode target_mode = GET_MODE (target);
39466 machine_mode extend_mode = target_mode;
39467 machine_mode ptr_mode = Pmode;
39468 enum rtx_code extend = UNKNOWN;
39470 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
39472 extend = GET_CODE (orig_mem);
39473 orig_mem = XEXP (orig_mem, 0);
39474 target_mode = GET_MODE (orig_mem);
39477 gcc_assert (MEM_P (orig_mem));
39479 orig_addr = XEXP (orig_mem, 0);
39480 plus_or_lo_sum = GET_CODE (orig_addr);
39481 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39483 offset = XEXP (orig_addr, 1);
39484 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39485 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39487 if (extend != UNKNOWN)
39488 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
39490 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39491 UNSPEC_FUSION_P9);
39493 set = gen_rtx_SET (target, new_mem);
39494 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39495 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39496 emit_insn (insn);
39498 return;
39501 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39502 store sequence.
39504 The operands are:
39505 operands[0] register set with addis
39506 operands[1] value set via addis
39507 operands[2] target D-form memory being stored to
39508 operands[3] register being stored
39510 This is similar to the fusion introduced with power8, except it scales to
39511 both loads/stores and does not require the result register to be the same as
39512 the base register. At the moment, we only do this if register set with addis
39513 is dead. */
39515 void
39516 expand_fusion_p9_store (rtx *operands)
39518 rtx tmp_reg = operands[0];
39519 rtx addis_value = operands[1];
39520 rtx orig_mem = operands[2];
39521 rtx src = operands[3];
39522 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39523 enum rtx_code plus_or_lo_sum;
39524 machine_mode target_mode = GET_MODE (orig_mem);
39525 machine_mode ptr_mode = Pmode;
39527 gcc_assert (MEM_P (orig_mem));
39529 orig_addr = XEXP (orig_mem, 0);
39530 plus_or_lo_sum = GET_CODE (orig_addr);
39531 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39533 offset = XEXP (orig_addr, 1);
39534 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39535 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39537 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39538 UNSPEC_FUSION_P9);
39540 set = gen_rtx_SET (new_mem, new_src);
39541 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39542 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39543 emit_insn (insn);
39545 return;
39548 /* Return a string to fuse an addis instruction with a load using extended
39549 fusion. The address that is used is the logical address that was formed
39550 during peephole2: (lo_sum (high) (low-part))
39552 The code is complicated, so we call output_asm_insn directly, and just
39553 return "". */
39555 const char *
39556 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39558 enum machine_mode mode = GET_MODE (reg);
39559 rtx hi;
39560 rtx lo;
39561 rtx addr;
39562 const char *load_string;
39563 int r;
39565 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39567 mem = XEXP (mem, 0);
39568 mode = GET_MODE (mem);
39571 if (GET_CODE (reg) == SUBREG)
39573 gcc_assert (SUBREG_BYTE (reg) == 0);
39574 reg = SUBREG_REG (reg);
39577 if (!REG_P (reg))
39578 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39580 r = REGNO (reg);
39581 if (FP_REGNO_P (r))
39583 if (mode == SFmode)
39584 load_string = "lfs";
39585 else if (mode == DFmode || mode == DImode)
39586 load_string = "lfd";
39587 else
39588 gcc_unreachable ();
39590 else if (INT_REGNO_P (r))
39592 switch (mode)
39594 case QImode:
39595 load_string = "lbz";
39596 break;
39597 case HImode:
39598 load_string = "lhz";
39599 break;
39600 case SImode:
39601 case SFmode:
39602 load_string = "lwz";
39603 break;
39604 case DImode:
39605 case DFmode:
39606 if (!TARGET_POWERPC64)
39607 gcc_unreachable ();
39608 load_string = "ld";
39609 break;
39610 default:
39611 gcc_unreachable ();
39614 else
39615 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39617 if (!MEM_P (mem))
39618 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39620 addr = XEXP (mem, 0);
39621 fusion_split_address (addr, &hi, &lo);
39623 /* Emit the addis instruction. */
39624 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
39626 /* Emit the D-form load instruction. */
39627 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39629 return "";
39632 /* Return a string to fuse an addis instruction with a store using extended
39633 fusion. The address that is used is the logical address that was formed
39634 during peephole2: (lo_sum (high) (low-part))
39636 The code is complicated, so we call output_asm_insn directly, and just
39637 return "". */
39639 const char *
39640 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39642 enum machine_mode mode = GET_MODE (reg);
39643 rtx hi;
39644 rtx lo;
39645 rtx addr;
39646 const char *store_string;
39647 int r;
39649 if (GET_CODE (reg) == SUBREG)
39651 gcc_assert (SUBREG_BYTE (reg) == 0);
39652 reg = SUBREG_REG (reg);
39655 if (!REG_P (reg))
39656 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39658 r = REGNO (reg);
39659 if (FP_REGNO_P (r))
39661 if (mode == SFmode)
39662 store_string = "stfs";
39663 else if (mode == DFmode)
39664 store_string = "stfd";
39665 else
39666 gcc_unreachable ();
39668 else if (INT_REGNO_P (r))
39670 switch (mode)
39672 case QImode:
39673 store_string = "stb";
39674 break;
39675 case HImode:
39676 store_string = "sth";
39677 break;
39678 case SImode:
39679 case SFmode:
39680 store_string = "stw";
39681 break;
39682 case DImode:
39683 case DFmode:
39684 if (!TARGET_POWERPC64)
39685 gcc_unreachable ();
39686 store_string = "std";
39687 break;
39688 default:
39689 gcc_unreachable ();
39692 else
39693 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39695 if (!MEM_P (mem))
39696 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39698 addr = XEXP (mem, 0);
39699 fusion_split_address (addr, &hi, &lo);
39701 /* Emit the addis instruction. */
39702 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39704 /* Emit the D-form load instruction. */
39705 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39707 return "";
39711 /* Analyze vector computations and remove unnecessary doubleword
39712 swaps (xxswapdi instructions). This pass is performed only
39713 for little-endian VSX code generation.
39715 For this specific case, loads and stores of 4x32 and 2x64 vectors
39716 are inefficient. These are implemented using the lvx2dx and
39717 stvx2dx instructions, which invert the order of doublewords in
39718 a vector register. Thus the code generation inserts an xxswapdi
39719 after each such load, and prior to each such store. (For spill
39720 code after register assignment, an additional xxswapdi is inserted
39721 following each store in order to return a hard register to its
39722 unpermuted value.)
39724 The extra xxswapdi instructions reduce performance. This can be
39725 particularly bad for vectorized code. The purpose of this pass
39726 is to reduce the number of xxswapdi instructions required for
39727 correctness.
39729 The primary insight is that much code that operates on vectors
39730 does not care about the relative order of elements in a register,
39731 so long as the correct memory order is preserved. If we have
39732 a computation where all input values are provided by lvxd2x/xxswapdi
39733 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39734 and all intermediate computations are pure SIMD (independent of
39735 element order), then all the xxswapdi's associated with the loads
39736 and stores may be removed.
39738 This pass uses some of the infrastructure and logical ideas from
39739 the "web" pass in web.c. We create maximal webs of computations
39740 fitting the description above using union-find. Each such web is
39741 then optimized by removing its unnecessary xxswapdi instructions.
39743 The pass is placed prior to global optimization so that we can
39744 perform the optimization in the safest and simplest way possible;
39745 that is, by replacing each xxswapdi insn with a register copy insn.
39746 Subsequent forward propagation will remove copies where possible.
39748 There are some operations sensitive to element order for which we
39749 can still allow the operation, provided we modify those operations.
39750 These include CONST_VECTORs, for which we must swap the first and
39751 second halves of the constant vector; and SUBREGs, for which we
39752 must adjust the byte offset to account for the swapped doublewords.
39753 A remaining opportunity would be non-immediate-form splats, for
39754 which we should adjust the selected lane of the input. We should
39755 also make code generation adjustments for sum-across operations,
39756 since this is a common vectorizer reduction.
39758 Because we run prior to the first split, we can see loads and stores
39759 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39760 vector loads and stores that have not yet been split into a permuting
39761 load/store and a swap. (One way this can happen is with a builtin
39762 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39763 than deleting a swap, we convert the load/store into a permuting
39764 load/store (which effectively removes the swap). */
39766 /* Notes on Permutes
39768 We do not currently handle computations that contain permutes. There
39769 is a general transformation that can be performed correctly, but it
39770 may introduce more expensive code than it replaces. To handle these
39771 would require a cost model to determine when to perform the optimization.
39772 This commentary records how this could be done if desired.
39774 The most general permute is something like this (example for V16QI):
39776 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39777 (parallel [(const_int a0) (const_int a1)
39779 (const_int a14) (const_int a15)]))
39781 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39782 to produce in the result.
39784 Regardless of mode, we can convert the PARALLEL to a mask of 16
39785 byte-element selectors. Let's call this M, with M[i] representing
39786 the ith byte-element selector value. Then if we swap doublewords
39787 throughout the computation, we can get correct behavior by replacing
39788 M with M' as follows:
39790 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39791 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39793 This seems promising at first, since we are just replacing one mask
39794 with another. But certain masks are preferable to others. If M
39795 is a mask that matches a vmrghh pattern, for example, M' certainly
39796 will not. Instead of a single vmrghh, we would generate a load of
39797 M' and a vperm. So we would need to know how many xxswapd's we can
39798 remove as a result of this transformation to determine if it's
39799 profitable; and preferably the logic would need to be aware of all
39800 the special preferable masks.
39802 Another form of permute is an UNSPEC_VPERM, in which the mask is
39803 already in a register. In some cases, this mask may be a constant
39804 that we can discover with ud-chains, in which case the above
39805 transformation is ok. However, the common usage here is for the
39806 mask to be produced by an UNSPEC_LVSL, in which case the mask
39807 cannot be known at compile time. In such a case we would have to
39808 generate several instructions to compute M' as above at run time,
39809 and a cost model is needed again.
39811 However, when the mask M for an UNSPEC_VPERM is loaded from the
39812 constant pool, we can replace M with M' as above at no cost
39813 beyond adding a constant pool entry. */
39815 /* This is based on the union-find logic in web.c. web_entry_base is
39816 defined in df.h. */
39817 class swap_web_entry : public web_entry_base
39819 public:
39820 /* Pointer to the insn. */
39821 rtx_insn *insn;
39822 /* Set if insn contains a mention of a vector register. All other
39823 fields are undefined if this field is unset. */
39824 unsigned int is_relevant : 1;
39825 /* Set if insn is a load. */
39826 unsigned int is_load : 1;
39827 /* Set if insn is a store. */
39828 unsigned int is_store : 1;
39829 /* Set if insn is a doubleword swap. This can either be a register swap
39830 or a permuting load or store (test is_load and is_store for this). */
39831 unsigned int is_swap : 1;
39832 /* Set if the insn has a live-in use of a parameter register. */
39833 unsigned int is_live_in : 1;
39834 /* Set if the insn has a live-out def of a return register. */
39835 unsigned int is_live_out : 1;
39836 /* Set if the insn contains a subreg reference of a vector register. */
39837 unsigned int contains_subreg : 1;
39838 /* Set if the insn contains a 128-bit integer operand. */
39839 unsigned int is_128_int : 1;
39840 /* Set if this is a call-insn. */
39841 unsigned int is_call : 1;
39842 /* Set if this insn does not perform a vector operation for which
39843 element order matters, or if we know how to fix it up if it does.
39844 Undefined if is_swap is set. */
39845 unsigned int is_swappable : 1;
39846 /* A nonzero value indicates what kind of special handling for this
39847 insn is required if doublewords are swapped. Undefined if
39848 is_swappable is not set. */
39849 unsigned int special_handling : 4;
39850 /* Set if the web represented by this entry cannot be optimized. */
39851 unsigned int web_not_optimizable : 1;
39852 /* Set if this insn should be deleted. */
39853 unsigned int will_delete : 1;
39856 enum special_handling_values {
39857 SH_NONE = 0,
39858 SH_CONST_VECTOR,
39859 SH_SUBREG,
39860 SH_NOSWAP_LD,
39861 SH_NOSWAP_ST,
39862 SH_EXTRACT,
39863 SH_SPLAT,
39864 SH_XXPERMDI,
39865 SH_CONCAT,
39866 SH_VPERM
39869 /* Union INSN with all insns containing definitions that reach USE.
39870 Detect whether USE is live-in to the current function. */
39871 static void
39872 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
39874 struct df_link *link = DF_REF_CHAIN (use);
39876 if (!link)
39877 insn_entry[INSN_UID (insn)].is_live_in = 1;
39879 while (link)
39881 if (DF_REF_IS_ARTIFICIAL (link->ref))
39882 insn_entry[INSN_UID (insn)].is_live_in = 1;
39884 if (DF_REF_INSN_INFO (link->ref))
39886 rtx def_insn = DF_REF_INSN (link->ref);
39887 (void)unionfind_union (insn_entry + INSN_UID (insn),
39888 insn_entry + INSN_UID (def_insn));
39891 link = link->next;
39895 /* Union INSN with all insns containing uses reached from DEF.
39896 Detect whether DEF is live-out from the current function. */
39897 static void
39898 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
39900 struct df_link *link = DF_REF_CHAIN (def);
39902 if (!link)
39903 insn_entry[INSN_UID (insn)].is_live_out = 1;
39905 while (link)
39907 /* This could be an eh use or some other artificial use;
39908 we treat these all the same (killing the optimization). */
39909 if (DF_REF_IS_ARTIFICIAL (link->ref))
39910 insn_entry[INSN_UID (insn)].is_live_out = 1;
39912 if (DF_REF_INSN_INFO (link->ref))
39914 rtx use_insn = DF_REF_INSN (link->ref);
39915 (void)unionfind_union (insn_entry + INSN_UID (insn),
39916 insn_entry + INSN_UID (use_insn));
39919 link = link->next;
39923 /* Return 1 iff INSN is a load insn, including permuting loads that
39924 represent an lvxd2x instruction; else return 0. */
39925 static unsigned int
39926 insn_is_load_p (rtx insn)
39928 rtx body = PATTERN (insn);
39930 if (GET_CODE (body) == SET)
39932 if (GET_CODE (SET_SRC (body)) == MEM)
39933 return 1;
39935 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
39936 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
39937 return 1;
39939 return 0;
39942 if (GET_CODE (body) != PARALLEL)
39943 return 0;
39945 rtx set = XVECEXP (body, 0, 0);
39947 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
39948 return 1;
39950 return 0;
39953 /* Return 1 iff INSN is a store insn, including permuting stores that
39954 represent an stvxd2x instruction; else return 0. */
39955 static unsigned int
39956 insn_is_store_p (rtx insn)
39958 rtx body = PATTERN (insn);
39959 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
39960 return 1;
39961 if (GET_CODE (body) != PARALLEL)
39962 return 0;
39963 rtx set = XVECEXP (body, 0, 0);
39964 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
39965 return 1;
39966 return 0;
39969 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39970 a permuting load, or a permuting store. */
39971 static unsigned int
39972 insn_is_swap_p (rtx insn)
39974 rtx body = PATTERN (insn);
39975 if (GET_CODE (body) != SET)
39976 return 0;
39977 rtx rhs = SET_SRC (body);
39978 if (GET_CODE (rhs) != VEC_SELECT)
39979 return 0;
39980 rtx parallel = XEXP (rhs, 1);
39981 if (GET_CODE (parallel) != PARALLEL)
39982 return 0;
39983 unsigned int len = XVECLEN (parallel, 0);
39984 if (len != 2 && len != 4 && len != 8 && len != 16)
39985 return 0;
39986 for (unsigned int i = 0; i < len / 2; ++i)
39988 rtx op = XVECEXP (parallel, 0, i);
39989 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
39990 return 0;
39992 for (unsigned int i = len / 2; i < len; ++i)
39994 rtx op = XVECEXP (parallel, 0, i);
39995 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
39996 return 0;
39998 return 1;
40001 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
40002 static bool
40003 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
40005 unsigned uid = INSN_UID (insn);
40006 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
40007 return false;
40009 /* Find the unique use in the swap and locate its def. If the def
40010 isn't unique, punt. */
40011 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40012 df_ref use;
40013 FOR_EACH_INSN_INFO_USE (use, insn_info)
40015 struct df_link *def_link = DF_REF_CHAIN (use);
40016 if (!def_link || def_link->next)
40017 return false;
40019 rtx def_insn = DF_REF_INSN (def_link->ref);
40020 unsigned uid2 = INSN_UID (def_insn);
40021 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
40022 return false;
40024 rtx body = PATTERN (def_insn);
40025 if (GET_CODE (body) != SET
40026 || GET_CODE (SET_SRC (body)) != VEC_SELECT
40027 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
40028 return false;
40030 rtx mem = XEXP (SET_SRC (body), 0);
40031 rtx base_reg = XEXP (mem, 0);
40033 df_ref base_use;
40034 insn_info = DF_INSN_INFO_GET (def_insn);
40035 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40037 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40038 continue;
40040 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40041 if (!base_def_link || base_def_link->next)
40042 return false;
40044 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
40045 rtx tocrel_body = PATTERN (tocrel_insn);
40046 rtx base, offset;
40047 if (GET_CODE (tocrel_body) != SET)
40048 return false;
40049 /* There is an extra level of indirection for small/large
40050 code models. */
40051 rtx tocrel_expr = SET_SRC (tocrel_body);
40052 if (GET_CODE (tocrel_expr) == MEM)
40053 tocrel_expr = XEXP (tocrel_expr, 0);
40054 if (!toc_relative_expr_p (tocrel_expr, false))
40055 return false;
40056 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40057 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
40058 return false;
40061 return true;
40064 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
40065 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
40066 static bool
40067 v2df_reduction_p (rtx op)
40069 if (GET_MODE (op) != V2DFmode)
40070 return false;
40072 enum rtx_code code = GET_CODE (op);
40073 if (code != PLUS && code != SMIN && code != SMAX)
40074 return false;
40076 rtx concat = XEXP (op, 0);
40077 if (GET_CODE (concat) != VEC_CONCAT)
40078 return false;
40080 rtx select0 = XEXP (concat, 0);
40081 rtx select1 = XEXP (concat, 1);
40082 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
40083 return false;
40085 rtx reg0 = XEXP (select0, 0);
40086 rtx reg1 = XEXP (select1, 0);
40087 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
40088 return false;
40090 rtx parallel0 = XEXP (select0, 1);
40091 rtx parallel1 = XEXP (select1, 1);
40092 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
40093 return false;
40095 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
40096 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
40097 return false;
40099 return true;
40102 /* Return 1 iff OP is an operand that will not be affected by having
40103 vector doublewords swapped in memory. */
40104 static unsigned int
40105 rtx_is_swappable_p (rtx op, unsigned int *special)
40107 enum rtx_code code = GET_CODE (op);
40108 int i, j;
40109 rtx parallel;
40111 switch (code)
40113 case LABEL_REF:
40114 case SYMBOL_REF:
40115 case CLOBBER:
40116 case REG:
40117 return 1;
40119 case VEC_CONCAT:
40120 case ASM_INPUT:
40121 case ASM_OPERANDS:
40122 return 0;
40124 case CONST_VECTOR:
40126 *special = SH_CONST_VECTOR;
40127 return 1;
40130 case VEC_DUPLICATE:
40131 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
40132 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
40133 it represents a vector splat for which we can do special
40134 handling. */
40135 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
40136 return 1;
40137 else if (REG_P (XEXP (op, 0))
40138 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40139 /* This catches V2DF and V2DI splat, at a minimum. */
40140 return 1;
40141 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
40142 && REG_P (XEXP (XEXP (op, 0), 0))
40143 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40144 /* This catches splat of a truncated value. */
40145 return 1;
40146 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
40147 /* If the duplicated item is from a select, defer to the select
40148 processing to see if we can change the lane for the splat. */
40149 return rtx_is_swappable_p (XEXP (op, 0), special);
40150 else
40151 return 0;
40153 case VEC_SELECT:
40154 /* A vec_extract operation is ok if we change the lane. */
40155 if (GET_CODE (XEXP (op, 0)) == REG
40156 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
40157 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40158 && XVECLEN (parallel, 0) == 1
40159 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
40161 *special = SH_EXTRACT;
40162 return 1;
40164 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
40165 XXPERMDI is a swap operation, it will be identified by
40166 insn_is_swap_p and therefore we won't get here. */
40167 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
40168 && (GET_MODE (XEXP (op, 0)) == V4DFmode
40169 || GET_MODE (XEXP (op, 0)) == V4DImode)
40170 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40171 && XVECLEN (parallel, 0) == 2
40172 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
40173 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
40175 *special = SH_XXPERMDI;
40176 return 1;
40178 else if (v2df_reduction_p (op))
40179 return 1;
40180 else
40181 return 0;
40183 case UNSPEC:
40185 /* Various operations are unsafe for this optimization, at least
40186 without significant additional work. Permutes are obviously
40187 problematic, as both the permute control vector and the ordering
40188 of the target values are invalidated by doubleword swapping.
40189 Vector pack and unpack modify the number of vector lanes.
40190 Merge-high/low will not operate correctly on swapped operands.
40191 Vector shifts across element boundaries are clearly uncool,
40192 as are vector select and concatenate operations. Vector
40193 sum-across instructions define one operand with a specific
40194 order-dependent element, so additional fixup code would be
40195 needed to make those work. Vector set and non-immediate-form
40196 vector splat are element-order sensitive. A few of these
40197 cases might be workable with special handling if required.
40198 Adding cost modeling would be appropriate in some cases. */
40199 int val = XINT (op, 1);
40200 switch (val)
40202 default:
40203 break;
40204 case UNSPEC_VMRGH_DIRECT:
40205 case UNSPEC_VMRGL_DIRECT:
40206 case UNSPEC_VPACK_SIGN_SIGN_SAT:
40207 case UNSPEC_VPACK_SIGN_UNS_SAT:
40208 case UNSPEC_VPACK_UNS_UNS_MOD:
40209 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
40210 case UNSPEC_VPACK_UNS_UNS_SAT:
40211 case UNSPEC_VPERM:
40212 case UNSPEC_VPERM_UNS:
40213 case UNSPEC_VPERMHI:
40214 case UNSPEC_VPERMSI:
40215 case UNSPEC_VPKPX:
40216 case UNSPEC_VSLDOI:
40217 case UNSPEC_VSLO:
40218 case UNSPEC_VSRO:
40219 case UNSPEC_VSUM2SWS:
40220 case UNSPEC_VSUM4S:
40221 case UNSPEC_VSUM4UBS:
40222 case UNSPEC_VSUMSWS:
40223 case UNSPEC_VSUMSWS_DIRECT:
40224 case UNSPEC_VSX_CONCAT:
40225 case UNSPEC_VSX_SET:
40226 case UNSPEC_VSX_SLDWI:
40227 case UNSPEC_VUNPACK_HI_SIGN:
40228 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
40229 case UNSPEC_VUNPACK_LO_SIGN:
40230 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
40231 case UNSPEC_VUPKHPX:
40232 case UNSPEC_VUPKHS_V4SF:
40233 case UNSPEC_VUPKHU_V4SF:
40234 case UNSPEC_VUPKLPX:
40235 case UNSPEC_VUPKLS_V4SF:
40236 case UNSPEC_VUPKLU_V4SF:
40237 case UNSPEC_VSX_CVDPSPN:
40238 case UNSPEC_VSX_CVSPDP:
40239 case UNSPEC_VSX_CVSPDPN:
40240 case UNSPEC_VSX_EXTRACT:
40241 case UNSPEC_VSX_VSLO:
40242 case UNSPEC_VSX_VEC_INIT:
40243 return 0;
40244 case UNSPEC_VSPLT_DIRECT:
40245 *special = SH_SPLAT;
40246 return 1;
40247 case UNSPEC_REDUC_PLUS:
40248 case UNSPEC_REDUC:
40249 return 1;
40253 default:
40254 break;
40257 const char *fmt = GET_RTX_FORMAT (code);
40258 int ok = 1;
40260 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40261 if (fmt[i] == 'e' || fmt[i] == 'u')
40263 unsigned int special_op = SH_NONE;
40264 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
40265 if (special_op == SH_NONE)
40266 continue;
40267 /* Ensure we never have two kinds of special handling
40268 for the same insn. */
40269 if (*special != SH_NONE && *special != special_op)
40270 return 0;
40271 *special = special_op;
40273 else if (fmt[i] == 'E')
40274 for (j = 0; j < XVECLEN (op, i); ++j)
40276 unsigned int special_op = SH_NONE;
40277 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
40278 if (special_op == SH_NONE)
40279 continue;
40280 /* Ensure we never have two kinds of special handling
40281 for the same insn. */
40282 if (*special != SH_NONE && *special != special_op)
40283 return 0;
40284 *special = special_op;
40287 return ok;
40290 /* Return 1 iff INSN is an operand that will not be affected by
40291 having vector doublewords swapped in memory (in which case
40292 *SPECIAL is unchanged), or that can be modified to be correct
40293 if vector doublewords are swapped in memory (in which case
40294 *SPECIAL is changed to a value indicating how). */
40295 static unsigned int
40296 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
40297 unsigned int *special)
40299 /* Calls are always bad. */
40300 if (GET_CODE (insn) == CALL_INSN)
40301 return 0;
40303 /* Loads and stores seen here are not permuting, but we can still
40304 fix them up by converting them to permuting ones. Exceptions:
40305 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
40306 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
40307 for the SET source. Also we must now make an exception for lvx
40308 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
40309 explicit "& -16") since this leads to unrecognizable insns. */
40310 rtx body = PATTERN (insn);
40311 int i = INSN_UID (insn);
40313 if (insn_entry[i].is_load)
40315 if (GET_CODE (body) == SET)
40317 rtx rhs = SET_SRC (body);
40318 gcc_assert (GET_CODE (rhs) == MEM);
40319 if (GET_CODE (XEXP (rhs, 0)) == AND)
40320 return 0;
40322 *special = SH_NOSWAP_LD;
40323 return 1;
40325 else
40326 return 0;
40329 if (insn_entry[i].is_store)
40331 if (GET_CODE (body) == SET
40332 && GET_CODE (SET_SRC (body)) != UNSPEC)
40334 rtx lhs = SET_DEST (body);
40335 gcc_assert (GET_CODE (lhs) == MEM);
40336 if (GET_CODE (XEXP (lhs, 0)) == AND)
40337 return 0;
40339 *special = SH_NOSWAP_ST;
40340 return 1;
40342 else
40343 return 0;
40346 /* A convert to single precision can be left as is provided that
40347 all of its uses are in xxspltw instructions that splat BE element
40348 zero. */
40349 if (GET_CODE (body) == SET
40350 && GET_CODE (SET_SRC (body)) == UNSPEC
40351 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
40353 df_ref def;
40354 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40356 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40358 struct df_link *link = DF_REF_CHAIN (def);
40359 if (!link)
40360 return 0;
40362 for (; link; link = link->next) {
40363 rtx use_insn = DF_REF_INSN (link->ref);
40364 rtx use_body = PATTERN (use_insn);
40365 if (GET_CODE (use_body) != SET
40366 || GET_CODE (SET_SRC (use_body)) != UNSPEC
40367 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
40368 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
40369 return 0;
40373 return 1;
40376 /* A concatenation of two doublewords is ok if we reverse the
40377 order of the inputs. */
40378 if (GET_CODE (body) == SET
40379 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
40380 && (GET_MODE (SET_SRC (body)) == V2DFmode
40381 || GET_MODE (SET_SRC (body)) == V2DImode))
40383 *special = SH_CONCAT;
40384 return 1;
40387 /* V2DF reductions are always swappable. */
40388 if (GET_CODE (body) == PARALLEL)
40390 rtx expr = XVECEXP (body, 0, 0);
40391 if (GET_CODE (expr) == SET
40392 && v2df_reduction_p (SET_SRC (expr)))
40393 return 1;
40396 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
40397 constant pool. */
40398 if (GET_CODE (body) == SET
40399 && GET_CODE (SET_SRC (body)) == UNSPEC
40400 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
40401 && XVECLEN (SET_SRC (body), 0) == 3
40402 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
40404 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
40405 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40406 df_ref use;
40407 FOR_EACH_INSN_INFO_USE (use, insn_info)
40408 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40410 struct df_link *def_link = DF_REF_CHAIN (use);
40411 /* Punt if multiple definitions for this reg. */
40412 if (def_link && !def_link->next &&
40413 const_load_sequence_p (insn_entry,
40414 DF_REF_INSN (def_link->ref)))
40416 *special = SH_VPERM;
40417 return 1;
40422 /* Otherwise check the operands for vector lane violations. */
40423 return rtx_is_swappable_p (body, special);
40426 enum chain_purpose { FOR_LOADS, FOR_STORES };
40428 /* Return true if the UD or DU chain headed by LINK is non-empty,
40429 and every entry on the chain references an insn that is a
40430 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
40431 register swap must have only permuting loads as reaching defs.
40432 If PURPOSE is FOR_STORES, each such register swap must have only
40433 register swaps or permuting stores as reached uses. */
40434 static bool
40435 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
40436 enum chain_purpose purpose)
40438 if (!link)
40439 return false;
40441 for (; link; link = link->next)
40443 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
40444 continue;
40446 if (DF_REF_IS_ARTIFICIAL (link->ref))
40447 return false;
40449 rtx reached_insn = DF_REF_INSN (link->ref);
40450 unsigned uid = INSN_UID (reached_insn);
40451 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
40453 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
40454 || insn_entry[uid].is_store)
40455 return false;
40457 if (purpose == FOR_LOADS)
40459 df_ref use;
40460 FOR_EACH_INSN_INFO_USE (use, insn_info)
40462 struct df_link *swap_link = DF_REF_CHAIN (use);
40464 while (swap_link)
40466 if (DF_REF_IS_ARTIFICIAL (link->ref))
40467 return false;
40469 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
40470 unsigned uid2 = INSN_UID (swap_def_insn);
40472 /* Only permuting loads are allowed. */
40473 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
40474 return false;
40476 swap_link = swap_link->next;
40480 else if (purpose == FOR_STORES)
40482 df_ref def;
40483 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40485 struct df_link *swap_link = DF_REF_CHAIN (def);
40487 while (swap_link)
40489 if (DF_REF_IS_ARTIFICIAL (link->ref))
40490 return false;
40492 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
40493 unsigned uid2 = INSN_UID (swap_use_insn);
40495 /* Permuting stores or register swaps are allowed. */
40496 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
40497 return false;
40499 swap_link = swap_link->next;
40505 return true;
40508 /* Mark the xxswapdi instructions associated with permuting loads and
40509 stores for removal. Note that we only flag them for deletion here,
40510 as there is a possibility of a swap being reached from multiple
40511 loads, etc. */
40512 static void
40513 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
40515 rtx insn = insn_entry[i].insn;
40516 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40518 if (insn_entry[i].is_load)
40520 df_ref def;
40521 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40523 struct df_link *link = DF_REF_CHAIN (def);
40525 /* We know by now that these are swaps, so we can delete
40526 them confidently. */
40527 while (link)
40529 rtx use_insn = DF_REF_INSN (link->ref);
40530 insn_entry[INSN_UID (use_insn)].will_delete = 1;
40531 link = link->next;
40535 else if (insn_entry[i].is_store)
40537 df_ref use;
40538 FOR_EACH_INSN_INFO_USE (use, insn_info)
40540 /* Ignore uses for addressability. */
40541 machine_mode mode = GET_MODE (DF_REF_REG (use));
40542 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
40543 continue;
40545 struct df_link *link = DF_REF_CHAIN (use);
40547 /* We know by now that these are swaps, so we can delete
40548 them confidently. */
40549 while (link)
40551 rtx def_insn = DF_REF_INSN (link->ref);
40552 insn_entry[INSN_UID (def_insn)].will_delete = 1;
40553 link = link->next;
40559 /* OP is either a CONST_VECTOR or an expression containing one.
40560 Swap the first half of the vector with the second in the first
40561 case. Recurse to find it in the second. */
40562 static void
40563 swap_const_vector_halves (rtx op)
40565 int i;
40566 enum rtx_code code = GET_CODE (op);
40567 if (GET_CODE (op) == CONST_VECTOR)
40569 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
40570 for (i = 0; i < half_units; ++i)
40572 rtx temp = CONST_VECTOR_ELT (op, i);
40573 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
40574 CONST_VECTOR_ELT (op, i + half_units) = temp;
40577 else
40579 int j;
40580 const char *fmt = GET_RTX_FORMAT (code);
40581 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40582 if (fmt[i] == 'e' || fmt[i] == 'u')
40583 swap_const_vector_halves (XEXP (op, i));
40584 else if (fmt[i] == 'E')
40585 for (j = 0; j < XVECLEN (op, i); ++j)
40586 swap_const_vector_halves (XVECEXP (op, i, j));
40590 /* Find all subregs of a vector expression that perform a narrowing,
40591 and adjust the subreg index to account for doubleword swapping. */
40592 static void
40593 adjust_subreg_index (rtx op)
40595 enum rtx_code code = GET_CODE (op);
40596 if (code == SUBREG
40597 && (GET_MODE_SIZE (GET_MODE (op))
40598 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
40600 unsigned int index = SUBREG_BYTE (op);
40601 if (index < 8)
40602 index += 8;
40603 else
40604 index -= 8;
40605 SUBREG_BYTE (op) = index;
40608 const char *fmt = GET_RTX_FORMAT (code);
40609 int i,j;
40610 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40611 if (fmt[i] == 'e' || fmt[i] == 'u')
40612 adjust_subreg_index (XEXP (op, i));
40613 else if (fmt[i] == 'E')
40614 for (j = 0; j < XVECLEN (op, i); ++j)
40615 adjust_subreg_index (XVECEXP (op, i, j));
40618 /* Convert the non-permuting load INSN to a permuting one. */
40619 static void
40620 permute_load (rtx_insn *insn)
40622 rtx body = PATTERN (insn);
40623 rtx mem_op = SET_SRC (body);
40624 rtx tgt_reg = SET_DEST (body);
40625 machine_mode mode = GET_MODE (tgt_reg);
40626 int n_elts = GET_MODE_NUNITS (mode);
40627 int half_elts = n_elts / 2;
40628 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40629 int i, j;
40630 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40631 XVECEXP (par, 0, i) = GEN_INT (j);
40632 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40633 XVECEXP (par, 0, i) = GEN_INT (j);
40634 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
40635 SET_SRC (body) = sel;
40636 INSN_CODE (insn) = -1; /* Force re-recognition. */
40637 df_insn_rescan (insn);
40639 if (dump_file)
40640 fprintf (dump_file, "Replacing load %d with permuted load\n",
40641 INSN_UID (insn));
40644 /* Convert the non-permuting store INSN to a permuting one. */
40645 static void
40646 permute_store (rtx_insn *insn)
40648 rtx body = PATTERN (insn);
40649 rtx src_reg = SET_SRC (body);
40650 machine_mode mode = GET_MODE (src_reg);
40651 int n_elts = GET_MODE_NUNITS (mode);
40652 int half_elts = n_elts / 2;
40653 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40654 int i, j;
40655 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40656 XVECEXP (par, 0, i) = GEN_INT (j);
40657 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40658 XVECEXP (par, 0, i) = GEN_INT (j);
40659 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
40660 SET_SRC (body) = sel;
40661 INSN_CODE (insn) = -1; /* Force re-recognition. */
40662 df_insn_rescan (insn);
40664 if (dump_file)
40665 fprintf (dump_file, "Replacing store %d with permuted store\n",
40666 INSN_UID (insn));
40669 /* Given OP that contains a vector extract operation, adjust the index
40670 of the extracted lane to account for the doubleword swap. */
40671 static void
40672 adjust_extract (rtx_insn *insn)
40674 rtx pattern = PATTERN (insn);
40675 if (GET_CODE (pattern) == PARALLEL)
40676 pattern = XVECEXP (pattern, 0, 0);
40677 rtx src = SET_SRC (pattern);
40678 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40679 account for that. */
40680 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40681 rtx par = XEXP (sel, 1);
40682 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40683 int lane = INTVAL (XVECEXP (par, 0, 0));
40684 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40685 XVECEXP (par, 0, 0) = GEN_INT (lane);
40686 INSN_CODE (insn) = -1; /* Force re-recognition. */
40687 df_insn_rescan (insn);
40689 if (dump_file)
40690 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40693 /* Given OP that contains a vector direct-splat operation, adjust the index
40694 of the source lane to account for the doubleword swap. */
40695 static void
40696 adjust_splat (rtx_insn *insn)
40698 rtx body = PATTERN (insn);
40699 rtx unspec = XEXP (body, 1);
40700 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40701 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40702 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40703 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40704 INSN_CODE (insn) = -1; /* Force re-recognition. */
40705 df_insn_rescan (insn);
40707 if (dump_file)
40708 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40711 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40712 swap), reverse the order of the source operands and adjust the indices
40713 of the source lanes to account for doubleword reversal. */
40714 static void
40715 adjust_xxpermdi (rtx_insn *insn)
40717 rtx set = PATTERN (insn);
40718 rtx select = XEXP (set, 1);
40719 rtx concat = XEXP (select, 0);
40720 rtx src0 = XEXP (concat, 0);
40721 XEXP (concat, 0) = XEXP (concat, 1);
40722 XEXP (concat, 1) = src0;
40723 rtx parallel = XEXP (select, 1);
40724 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40725 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40726 int new_lane0 = 3 - lane1;
40727 int new_lane1 = 3 - lane0;
40728 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40729 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40730 INSN_CODE (insn) = -1; /* Force re-recognition. */
40731 df_insn_rescan (insn);
40733 if (dump_file)
40734 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40737 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40738 reverse the order of those inputs. */
40739 static void
40740 adjust_concat (rtx_insn *insn)
40742 rtx set = PATTERN (insn);
40743 rtx concat = XEXP (set, 1);
40744 rtx src0 = XEXP (concat, 0);
40745 XEXP (concat, 0) = XEXP (concat, 1);
40746 XEXP (concat, 1) = src0;
40747 INSN_CODE (insn) = -1; /* Force re-recognition. */
40748 df_insn_rescan (insn);
40750 if (dump_file)
40751 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40754 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40755 constant pool to reflect swapped doublewords. */
40756 static void
40757 adjust_vperm (rtx_insn *insn)
40759 /* We previously determined that the UNSPEC_VPERM was fed by a
40760 swap of a swapping load of a TOC-relative constant pool symbol.
40761 Find the MEM in the swapping load and replace it with a MEM for
40762 the adjusted mask constant. */
40763 rtx set = PATTERN (insn);
40764 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40766 /* Find the swap. */
40767 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40768 df_ref use;
40769 rtx_insn *swap_insn = 0;
40770 FOR_EACH_INSN_INFO_USE (use, insn_info)
40771 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40773 struct df_link *def_link = DF_REF_CHAIN (use);
40774 gcc_assert (def_link && !def_link->next);
40775 swap_insn = DF_REF_INSN (def_link->ref);
40776 break;
40778 gcc_assert (swap_insn);
40780 /* Find the load. */
40781 insn_info = DF_INSN_INFO_GET (swap_insn);
40782 rtx_insn *load_insn = 0;
40783 FOR_EACH_INSN_INFO_USE (use, insn_info)
40785 struct df_link *def_link = DF_REF_CHAIN (use);
40786 gcc_assert (def_link && !def_link->next);
40787 load_insn = DF_REF_INSN (def_link->ref);
40788 break;
40790 gcc_assert (load_insn);
40792 /* Find the TOC-relative symbol access. */
40793 insn_info = DF_INSN_INFO_GET (load_insn);
40794 rtx_insn *tocrel_insn = 0;
40795 FOR_EACH_INSN_INFO_USE (use, insn_info)
40797 struct df_link *def_link = DF_REF_CHAIN (use);
40798 gcc_assert (def_link && !def_link->next);
40799 tocrel_insn = DF_REF_INSN (def_link->ref);
40800 break;
40802 gcc_assert (tocrel_insn);
40804 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40805 to set tocrel_base; otherwise it would be unnecessary as we've
40806 already established it will return true. */
40807 rtx base, offset;
40808 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
40809 /* There is an extra level of indirection for small/large code models. */
40810 if (GET_CODE (tocrel_expr) == MEM)
40811 tocrel_expr = XEXP (tocrel_expr, 0);
40812 if (!toc_relative_expr_p (tocrel_expr, false))
40813 gcc_unreachable ();
40814 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40815 rtx const_vector = get_pool_constant (base);
40816 /* With the extra indirection, get_pool_constant will produce the
40817 real constant from the reg_equal expression, so get the real
40818 constant. */
40819 if (GET_CODE (const_vector) == SYMBOL_REF)
40820 const_vector = get_pool_constant (const_vector);
40821 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
40823 /* Create an adjusted mask from the initial mask. */
40824 unsigned int new_mask[16], i, val;
40825 for (i = 0; i < 16; ++i) {
40826 val = INTVAL (XVECEXP (const_vector, 0, i));
40827 if (val < 16)
40828 new_mask[i] = (val + 8) % 16;
40829 else
40830 new_mask[i] = ((val + 8) % 16) + 16;
40833 /* Create a new CONST_VECTOR and a MEM that references it. */
40834 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
40835 for (i = 0; i < 16; ++i)
40836 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
40837 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
40838 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
40839 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40840 can't recognize. Force the SYMBOL_REF into a register. */
40841 if (!REG_P (XEXP (new_mem, 0))) {
40842 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
40843 XEXP (new_mem, 0) = base_reg;
40844 /* Move the newly created insn ahead of the load insn. */
40845 rtx_insn *force_insn = get_last_insn ();
40846 remove_insn (force_insn);
40847 rtx_insn *before_load_insn = PREV_INSN (load_insn);
40848 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
40849 df_insn_rescan (before_load_insn);
40850 df_insn_rescan (force_insn);
40853 /* Replace the MEM in the load instruction and rescan it. */
40854 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
40855 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
40856 df_insn_rescan (load_insn);
40858 if (dump_file)
40859 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
40862 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40863 with special handling. Take care of that here. */
40864 static void
40865 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
40867 rtx_insn *insn = insn_entry[i].insn;
40868 rtx body = PATTERN (insn);
40870 switch (insn_entry[i].special_handling)
40872 default:
40873 gcc_unreachable ();
40874 case SH_CONST_VECTOR:
40876 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40877 gcc_assert (GET_CODE (body) == SET);
40878 rtx rhs = SET_SRC (body);
40879 swap_const_vector_halves (rhs);
40880 if (dump_file)
40881 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
40882 break;
40884 case SH_SUBREG:
40885 /* A subreg of the same size is already safe. For subregs that
40886 select a smaller portion of a reg, adjust the index for
40887 swapped doublewords. */
40888 adjust_subreg_index (body);
40889 if (dump_file)
40890 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
40891 break;
40892 case SH_NOSWAP_LD:
40893 /* Convert a non-permuting load to a permuting one. */
40894 permute_load (insn);
40895 break;
40896 case SH_NOSWAP_ST:
40897 /* Convert a non-permuting store to a permuting one. */
40898 permute_store (insn);
40899 break;
40900 case SH_EXTRACT:
40901 /* Change the lane on an extract operation. */
40902 adjust_extract (insn);
40903 break;
40904 case SH_SPLAT:
40905 /* Change the lane on a direct-splat operation. */
40906 adjust_splat (insn);
40907 break;
40908 case SH_XXPERMDI:
40909 /* Change the lanes on an XXPERMDI operation. */
40910 adjust_xxpermdi (insn);
40911 break;
40912 case SH_CONCAT:
40913 /* Reverse the order of a concatenation operation. */
40914 adjust_concat (insn);
40915 break;
40916 case SH_VPERM:
40917 /* Change the mask loaded from the constant pool for a VPERM. */
40918 adjust_vperm (insn);
40919 break;
40923 /* Find the insn from the Ith table entry, which is known to be a
40924 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40925 static void
40926 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
40928 rtx_insn *insn = insn_entry[i].insn;
40929 rtx body = PATTERN (insn);
40930 rtx src_reg = XEXP (SET_SRC (body), 0);
40931 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
40932 rtx_insn *new_insn = emit_insn_before (copy, insn);
40933 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
40934 df_insn_rescan (new_insn);
40936 if (dump_file)
40938 unsigned int new_uid = INSN_UID (new_insn);
40939 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
40942 df_insn_delete (insn);
40943 remove_insn (insn);
40944 insn->set_deleted ();
40947 /* Dump the swap table to DUMP_FILE. */
40948 static void
40949 dump_swap_insn_table (swap_web_entry *insn_entry)
40951 int e = get_max_uid ();
40952 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
40954 for (int i = 0; i < e; ++i)
40955 if (insn_entry[i].is_relevant)
40957 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
40958 fprintf (dump_file, "%6d %6d ", i,
40959 pred_entry && pred_entry->insn
40960 ? INSN_UID (pred_entry->insn) : 0);
40961 if (insn_entry[i].is_load)
40962 fputs ("load ", dump_file);
40963 if (insn_entry[i].is_store)
40964 fputs ("store ", dump_file);
40965 if (insn_entry[i].is_swap)
40966 fputs ("swap ", dump_file);
40967 if (insn_entry[i].is_live_in)
40968 fputs ("live-in ", dump_file);
40969 if (insn_entry[i].is_live_out)
40970 fputs ("live-out ", dump_file);
40971 if (insn_entry[i].contains_subreg)
40972 fputs ("subreg ", dump_file);
40973 if (insn_entry[i].is_128_int)
40974 fputs ("int128 ", dump_file);
40975 if (insn_entry[i].is_call)
40976 fputs ("call ", dump_file);
40977 if (insn_entry[i].is_swappable)
40979 fputs ("swappable ", dump_file);
40980 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
40981 fputs ("special:constvec ", dump_file);
40982 else if (insn_entry[i].special_handling == SH_SUBREG)
40983 fputs ("special:subreg ", dump_file);
40984 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
40985 fputs ("special:load ", dump_file);
40986 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
40987 fputs ("special:store ", dump_file);
40988 else if (insn_entry[i].special_handling == SH_EXTRACT)
40989 fputs ("special:extract ", dump_file);
40990 else if (insn_entry[i].special_handling == SH_SPLAT)
40991 fputs ("special:splat ", dump_file);
40992 else if (insn_entry[i].special_handling == SH_XXPERMDI)
40993 fputs ("special:xxpermdi ", dump_file);
40994 else if (insn_entry[i].special_handling == SH_CONCAT)
40995 fputs ("special:concat ", dump_file);
40996 else if (insn_entry[i].special_handling == SH_VPERM)
40997 fputs ("special:vperm ", dump_file);
40999 if (insn_entry[i].web_not_optimizable)
41000 fputs ("unoptimizable ", dump_file);
41001 if (insn_entry[i].will_delete)
41002 fputs ("delete ", dump_file);
41003 fputs ("\n", dump_file);
41005 fputs ("\n", dump_file);
41008 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
41009 Here RTX is an (& addr (const_int -16)). Always return a new copy
41010 to avoid problems with combine. */
41011 static rtx
41012 alignment_with_canonical_addr (rtx align)
41014 rtx canon;
41015 rtx addr = XEXP (align, 0);
41017 if (REG_P (addr))
41018 canon = addr;
41020 else if (GET_CODE (addr) == PLUS)
41022 rtx addrop0 = XEXP (addr, 0);
41023 rtx addrop1 = XEXP (addr, 1);
41025 if (!REG_P (addrop0))
41026 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
41028 if (!REG_P (addrop1))
41029 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
41031 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
41034 else
41035 canon = force_reg (GET_MODE (addr), addr);
41037 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
41040 /* Check whether an rtx is an alignment mask, and if so, return
41041 a fully-expanded rtx for the masking operation. */
41042 static rtx
41043 alignment_mask (rtx_insn *insn)
41045 rtx body = PATTERN (insn);
41047 if (GET_CODE (body) != SET
41048 || GET_CODE (SET_SRC (body)) != AND
41049 || !REG_P (XEXP (SET_SRC (body), 0)))
41050 return 0;
41052 rtx mask = XEXP (SET_SRC (body), 1);
41054 if (GET_CODE (mask) == CONST_INT)
41056 if (INTVAL (mask) == -16)
41057 return alignment_with_canonical_addr (SET_SRC (body));
41058 else
41059 return 0;
41062 if (!REG_P (mask))
41063 return 0;
41065 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41066 df_ref use;
41067 rtx real_mask = 0;
41069 FOR_EACH_INSN_INFO_USE (use, insn_info)
41071 if (!rtx_equal_p (DF_REF_REG (use), mask))
41072 continue;
41074 struct df_link *def_link = DF_REF_CHAIN (use);
41075 if (!def_link || def_link->next)
41076 return 0;
41078 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
41079 rtx const_body = PATTERN (const_insn);
41080 if (GET_CODE (const_body) != SET)
41081 return 0;
41083 real_mask = SET_SRC (const_body);
41085 if (GET_CODE (real_mask) != CONST_INT
41086 || INTVAL (real_mask) != -16)
41087 return 0;
41090 if (real_mask == 0)
41091 return 0;
41093 return alignment_with_canonical_addr (SET_SRC (body));
41096 /* Given INSN that's a load or store based at BASE_REG, look for a
41097 feeding computation that aligns its address on a 16-byte boundary. */
41098 static rtx
41099 find_alignment_op (rtx_insn *insn, rtx base_reg)
41101 df_ref base_use;
41102 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41103 rtx and_operation = 0;
41105 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41107 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41108 continue;
41110 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41111 if (!base_def_link || base_def_link->next)
41112 break;
41114 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
41115 and_operation = alignment_mask (and_insn);
41116 if (and_operation != 0)
41117 break;
41120 return and_operation;
41123 struct del_info { bool replace; rtx_insn *replace_insn; };
41125 /* If INSN is the load for an lvx pattern, put it in canonical form. */
41126 static void
41127 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
41129 rtx body = PATTERN (insn);
41130 gcc_assert (GET_CODE (body) == SET
41131 && GET_CODE (SET_SRC (body)) == VEC_SELECT
41132 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
41134 rtx mem = XEXP (SET_SRC (body), 0);
41135 rtx base_reg = XEXP (mem, 0);
41137 rtx and_operation = find_alignment_op (insn, base_reg);
41139 if (and_operation != 0)
41141 df_ref def;
41142 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41143 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41145 struct df_link *link = DF_REF_CHAIN (def);
41146 if (!link || link->next)
41147 break;
41149 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41150 if (!insn_is_swap_p (swap_insn)
41151 || insn_is_load_p (swap_insn)
41152 || insn_is_store_p (swap_insn))
41153 break;
41155 /* Expected lvx pattern found. Change the swap to
41156 a copy, and propagate the AND operation into the
41157 load. */
41158 to_delete[INSN_UID (swap_insn)].replace = true;
41159 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41161 XEXP (mem, 0) = and_operation;
41162 SET_SRC (body) = mem;
41163 INSN_CODE (insn) = -1; /* Force re-recognition. */
41164 df_insn_rescan (insn);
41166 if (dump_file)
41167 fprintf (dump_file, "lvx opportunity found at %d\n",
41168 INSN_UID (insn));
41173 /* If INSN is the store for an stvx pattern, put it in canonical form. */
41174 static void
41175 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
41177 rtx body = PATTERN (insn);
41178 gcc_assert (GET_CODE (body) == SET
41179 && GET_CODE (SET_DEST (body)) == MEM
41180 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
41181 rtx mem = SET_DEST (body);
41182 rtx base_reg = XEXP (mem, 0);
41184 rtx and_operation = find_alignment_op (insn, base_reg);
41186 if (and_operation != 0)
41188 rtx src_reg = XEXP (SET_SRC (body), 0);
41189 df_ref src_use;
41190 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41191 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
41193 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
41194 continue;
41196 struct df_link *link = DF_REF_CHAIN (src_use);
41197 if (!link || link->next)
41198 break;
41200 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41201 if (!insn_is_swap_p (swap_insn)
41202 || insn_is_load_p (swap_insn)
41203 || insn_is_store_p (swap_insn))
41204 break;
41206 /* Expected stvx pattern found. Change the swap to
41207 a copy, and propagate the AND operation into the
41208 store. */
41209 to_delete[INSN_UID (swap_insn)].replace = true;
41210 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41212 XEXP (mem, 0) = and_operation;
41213 SET_SRC (body) = src_reg;
41214 INSN_CODE (insn) = -1; /* Force re-recognition. */
41215 df_insn_rescan (insn);
41217 if (dump_file)
41218 fprintf (dump_file, "stvx opportunity found at %d\n",
41219 INSN_UID (insn));
41224 /* Look for patterns created from builtin lvx and stvx calls, and
41225 canonicalize them to be properly recognized as such. */
41226 static void
41227 recombine_lvx_stvx_patterns (function *fun)
41229 int i;
41230 basic_block bb;
41231 rtx_insn *insn;
41233 int num_insns = get_max_uid ();
41234 del_info *to_delete = XCNEWVEC (del_info, num_insns);
41236 FOR_ALL_BB_FN (bb, fun)
41237 FOR_BB_INSNS (bb, insn)
41239 if (!NONDEBUG_INSN_P (insn))
41240 continue;
41242 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
41243 recombine_lvx_pattern (insn, to_delete);
41244 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
41245 recombine_stvx_pattern (insn, to_delete);
41248 /* Turning swaps into copies is delayed until now, to avoid problems
41249 with deleting instructions during the insn walk. */
41250 for (i = 0; i < num_insns; i++)
41251 if (to_delete[i].replace)
41253 rtx swap_body = PATTERN (to_delete[i].replace_insn);
41254 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
41255 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
41256 rtx_insn *new_insn = emit_insn_before (copy,
41257 to_delete[i].replace_insn);
41258 set_block_for_insn (new_insn,
41259 BLOCK_FOR_INSN (to_delete[i].replace_insn));
41260 df_insn_rescan (new_insn);
41261 df_insn_delete (to_delete[i].replace_insn);
41262 remove_insn (to_delete[i].replace_insn);
41263 to_delete[i].replace_insn->set_deleted ();
41266 free (to_delete);
41269 /* Main entry point for this pass. */
41270 unsigned int
41271 rs6000_analyze_swaps (function *fun)
41273 swap_web_entry *insn_entry;
41274 basic_block bb;
41275 rtx_insn *insn, *curr_insn = 0;
41277 /* Dataflow analysis for use-def chains. */
41278 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
41279 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
41280 df_analyze ();
41281 df_set_flags (DF_DEFER_INSN_RESCAN);
41283 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
41284 recombine_lvx_stvx_patterns (fun);
41286 /* Allocate structure to represent webs of insns. */
41287 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
41289 /* Walk the insns to gather basic data. */
41290 FOR_ALL_BB_FN (bb, fun)
41291 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
41293 unsigned int uid = INSN_UID (insn);
41294 if (NONDEBUG_INSN_P (insn))
41296 insn_entry[uid].insn = insn;
41298 if (GET_CODE (insn) == CALL_INSN)
41299 insn_entry[uid].is_call = 1;
41301 /* Walk the uses and defs to see if we mention vector regs.
41302 Record any constraints on optimization of such mentions. */
41303 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41304 df_ref mention;
41305 FOR_EACH_INSN_INFO_USE (mention, insn_info)
41307 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41308 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41310 /* If a use gets its value from a call insn, it will be
41311 a hard register and will look like (reg:V4SI 3 3).
41312 The df analysis creates two mentions for GPR3 and GPR4,
41313 both DImode. We must recognize this and treat it as a
41314 vector mention to ensure the call is unioned with this
41315 use. */
41316 if (mode == DImode && DF_REF_INSN_INFO (mention))
41318 rtx feeder = DF_REF_INSN (mention);
41319 /* FIXME: It is pretty hard to get from the df mention
41320 to the mode of the use in the insn. We arbitrarily
41321 pick a vector mode here, even though the use might
41322 be a real DImode. We can be too conservative
41323 (create a web larger than necessary) because of
41324 this, so consider eventually fixing this. */
41325 if (GET_CODE (feeder) == CALL_INSN)
41326 mode = V4SImode;
41329 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41331 insn_entry[uid].is_relevant = 1;
41332 if (mode == TImode || mode == V1TImode
41333 || FLOAT128_VECTOR_P (mode))
41334 insn_entry[uid].is_128_int = 1;
41335 if (DF_REF_INSN_INFO (mention))
41336 insn_entry[uid].contains_subreg
41337 = !rtx_equal_p (DF_REF_REG (mention),
41338 DF_REF_REAL_REG (mention));
41339 union_defs (insn_entry, insn, mention);
41342 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
41344 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41345 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41347 /* If we're loading up a hard vector register for a call,
41348 it looks like (set (reg:V4SI 9 9) (...)). The df
41349 analysis creates two mentions for GPR9 and GPR10, both
41350 DImode. So relying on the mode from the mentions
41351 isn't sufficient to ensure we union the call into the
41352 web with the parameter setup code. */
41353 if (mode == DImode && GET_CODE (insn) == SET
41354 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
41355 mode = GET_MODE (SET_DEST (insn));
41357 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41359 insn_entry[uid].is_relevant = 1;
41360 if (mode == TImode || mode == V1TImode
41361 || FLOAT128_VECTOR_P (mode))
41362 insn_entry[uid].is_128_int = 1;
41363 if (DF_REF_INSN_INFO (mention))
41364 insn_entry[uid].contains_subreg
41365 = !rtx_equal_p (DF_REF_REG (mention),
41366 DF_REF_REAL_REG (mention));
41367 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
41368 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
41369 insn_entry[uid].is_live_out = 1;
41370 union_uses (insn_entry, insn, mention);
41374 if (insn_entry[uid].is_relevant)
41376 /* Determine if this is a load or store. */
41377 insn_entry[uid].is_load = insn_is_load_p (insn);
41378 insn_entry[uid].is_store = insn_is_store_p (insn);
41380 /* Determine if this is a doubleword swap. If not,
41381 determine whether it can legally be swapped. */
41382 if (insn_is_swap_p (insn))
41383 insn_entry[uid].is_swap = 1;
41384 else
41386 unsigned int special = SH_NONE;
41387 insn_entry[uid].is_swappable
41388 = insn_is_swappable_p (insn_entry, insn, &special);
41389 if (special != SH_NONE && insn_entry[uid].contains_subreg)
41390 insn_entry[uid].is_swappable = 0;
41391 else if (special != SH_NONE)
41392 insn_entry[uid].special_handling = special;
41393 else if (insn_entry[uid].contains_subreg)
41394 insn_entry[uid].special_handling = SH_SUBREG;
41400 if (dump_file)
41402 fprintf (dump_file, "\nSwap insn entry table when first built\n");
41403 dump_swap_insn_table (insn_entry);
41406 /* Record unoptimizable webs. */
41407 unsigned e = get_max_uid (), i;
41408 for (i = 0; i < e; ++i)
41410 if (!insn_entry[i].is_relevant)
41411 continue;
41413 swap_web_entry *root
41414 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
41416 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
41417 || (insn_entry[i].contains_subreg
41418 && insn_entry[i].special_handling != SH_SUBREG)
41419 || insn_entry[i].is_128_int || insn_entry[i].is_call
41420 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
41421 root->web_not_optimizable = 1;
41423 /* If we have loads or stores that aren't permuting then the
41424 optimization isn't appropriate. */
41425 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
41426 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
41427 root->web_not_optimizable = 1;
41429 /* If we have permuting loads or stores that are not accompanied
41430 by a register swap, the optimization isn't appropriate. */
41431 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
41433 rtx insn = insn_entry[i].insn;
41434 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41435 df_ref def;
41437 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41439 struct df_link *link = DF_REF_CHAIN (def);
41441 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
41443 root->web_not_optimizable = 1;
41444 break;
41448 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
41450 rtx insn = insn_entry[i].insn;
41451 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41452 df_ref use;
41454 FOR_EACH_INSN_INFO_USE (use, insn_info)
41456 struct df_link *link = DF_REF_CHAIN (use);
41458 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
41460 root->web_not_optimizable = 1;
41461 break;
41467 if (dump_file)
41469 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
41470 dump_swap_insn_table (insn_entry);
41473 /* For each load and store in an optimizable web (which implies
41474 the loads and stores are permuting), find the associated
41475 register swaps and mark them for removal. Due to various
41476 optimizations we may mark the same swap more than once. Also
41477 perform special handling for swappable insns that require it. */
41478 for (i = 0; i < e; ++i)
41479 if ((insn_entry[i].is_load || insn_entry[i].is_store)
41480 && insn_entry[i].is_swap)
41482 swap_web_entry* root_entry
41483 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41484 if (!root_entry->web_not_optimizable)
41485 mark_swaps_for_removal (insn_entry, i);
41487 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
41489 swap_web_entry* root_entry
41490 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41491 if (!root_entry->web_not_optimizable)
41492 handle_special_swappables (insn_entry, i);
41495 /* Now delete the swaps marked for removal. */
41496 for (i = 0; i < e; ++i)
41497 if (insn_entry[i].will_delete)
41498 replace_swap_with_copy (insn_entry, i);
41500 /* Clean up. */
41501 free (insn_entry);
41502 return 0;
41505 const pass_data pass_data_analyze_swaps =
41507 RTL_PASS, /* type */
41508 "swaps", /* name */
41509 OPTGROUP_NONE, /* optinfo_flags */
41510 TV_NONE, /* tv_id */
41511 0, /* properties_required */
41512 0, /* properties_provided */
41513 0, /* properties_destroyed */
41514 0, /* todo_flags_start */
41515 TODO_df_finish, /* todo_flags_finish */
41518 class pass_analyze_swaps : public rtl_opt_pass
41520 public:
41521 pass_analyze_swaps(gcc::context *ctxt)
41522 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
41525 /* opt_pass methods: */
41526 virtual bool gate (function *)
41528 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
41529 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
41532 virtual unsigned int execute (function *fun)
41534 return rs6000_analyze_swaps (fun);
41537 }; // class pass_analyze_swaps
41539 rtl_opt_pass *
41540 make_pass_analyze_swaps (gcc::context *ctxt)
41542 return new pass_analyze_swaps (ctxt);
41545 #ifdef RS6000_GLIBC_ATOMIC_FENV
41546 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
41547 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
41548 #endif
41550 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
41552 static void
41553 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
41555 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
41557 #ifdef RS6000_GLIBC_ATOMIC_FENV
41558 if (atomic_hold_decl == NULL_TREE)
41560 atomic_hold_decl
41561 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41562 get_identifier ("__atomic_feholdexcept"),
41563 build_function_type_list (void_type_node,
41564 double_ptr_type_node,
41565 NULL_TREE));
41566 TREE_PUBLIC (atomic_hold_decl) = 1;
41567 DECL_EXTERNAL (atomic_hold_decl) = 1;
41570 if (atomic_clear_decl == NULL_TREE)
41572 atomic_clear_decl
41573 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41574 get_identifier ("__atomic_feclearexcept"),
41575 build_function_type_list (void_type_node,
41576 NULL_TREE));
41577 TREE_PUBLIC (atomic_clear_decl) = 1;
41578 DECL_EXTERNAL (atomic_clear_decl) = 1;
41581 tree const_double = build_qualified_type (double_type_node,
41582 TYPE_QUAL_CONST);
41583 tree const_double_ptr = build_pointer_type (const_double);
41584 if (atomic_update_decl == NULL_TREE)
41586 atomic_update_decl
41587 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41588 get_identifier ("__atomic_feupdateenv"),
41589 build_function_type_list (void_type_node,
41590 const_double_ptr,
41591 NULL_TREE));
41592 TREE_PUBLIC (atomic_update_decl) = 1;
41593 DECL_EXTERNAL (atomic_update_decl) = 1;
41596 tree fenv_var = create_tmp_var_raw (double_type_node);
41597 TREE_ADDRESSABLE (fenv_var) = 1;
41598 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
41600 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
41601 *clear = build_call_expr (atomic_clear_decl, 0);
41602 *update = build_call_expr (atomic_update_decl, 1,
41603 fold_convert (const_double_ptr, fenv_addr));
41604 #endif
41605 return;
41608 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
41609 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
41610 tree call_mffs = build_call_expr (mffs, 0);
41612 /* Generates the equivalent of feholdexcept (&fenv_var)
41614 *fenv_var = __builtin_mffs ();
41615 double fenv_hold;
41616 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
41617 __builtin_mtfsf (0xff, fenv_hold); */
41619 /* Mask to clear everything except for the rounding modes and non-IEEE
41620 arithmetic flag. */
41621 const unsigned HOST_WIDE_INT hold_exception_mask =
41622 HOST_WIDE_INT_C (0xffffffff00000007);
41624 tree fenv_var = create_tmp_var_raw (double_type_node);
41626 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
41628 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
41629 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41630 build_int_cst (uint64_type_node,
41631 hold_exception_mask));
41633 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41634 fenv_llu_and);
41636 tree hold_mtfsf = build_call_expr (mtfsf, 2,
41637 build_int_cst (unsigned_type_node, 0xff),
41638 fenv_hold_mtfsf);
41640 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
41642 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
41644 double fenv_clear = __builtin_mffs ();
41645 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
41646 __builtin_mtfsf (0xff, fenv_clear); */
41648 /* Mask to clear everything except for the rounding modes and non-IEEE
41649 arithmetic flag. */
41650 const unsigned HOST_WIDE_INT clear_exception_mask =
41651 HOST_WIDE_INT_C (0xffffffff00000000);
41653 tree fenv_clear = create_tmp_var_raw (double_type_node);
41655 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
41657 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
41658 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
41659 fenv_clean_llu,
41660 build_int_cst (uint64_type_node,
41661 clear_exception_mask));
41663 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41664 fenv_clear_llu_and);
41666 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41667 build_int_cst (unsigned_type_node, 0xff),
41668 fenv_clear_mtfsf);
41670 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41672 /* Generates the equivalent of feupdateenv (&fenv_var)
41674 double old_fenv = __builtin_mffs ();
41675 double fenv_update;
41676 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41677 (*(uint64_t*)fenv_var 0x1ff80fff);
41678 __builtin_mtfsf (0xff, fenv_update); */
41680 const unsigned HOST_WIDE_INT update_exception_mask =
41681 HOST_WIDE_INT_C (0xffffffff1fffff00);
41682 const unsigned HOST_WIDE_INT new_exception_mask =
41683 HOST_WIDE_INT_C (0x1ff80fff);
41685 tree old_fenv = create_tmp_var_raw (double_type_node);
41686 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41688 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41689 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41690 build_int_cst (uint64_type_node,
41691 update_exception_mask));
41693 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41694 build_int_cst (uint64_type_node,
41695 new_exception_mask));
41697 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41698 old_llu_and, new_llu_and);
41700 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41701 new_llu_mask);
41703 tree update_mtfsf = build_call_expr (mtfsf, 2,
41704 build_int_cst (unsigned_type_node, 0xff),
41705 fenv_update_mtfsf);
41707 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41710 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41712 static bool
41713 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41714 optimization_type opt_type)
41716 switch (op)
41718 case rsqrt_optab:
41719 return (opt_type == OPTIMIZE_FOR_SPEED
41720 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41722 default:
41723 return true;
41727 struct gcc_target targetm = TARGET_INITIALIZER;
41729 #include "gt-rs6000.h"