rs6000: Don't forget to initialize the TOC (PR77957)
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob05448cb7904799fbe3204faba882df4845a1a74c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-ssa.h"
60 #include "gimple-walk.h"
61 #include "intl.h"
62 #include "params.h"
63 #include "tm-constrs.h"
64 #include "tree-vectorizer.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "context.h"
68 #include "tree-pass.h"
69 #if TARGET_XCOFF
70 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
71 #endif
72 #if TARGET_MACHO
73 #include "gstab.h" /* for N_SLINE */
74 #endif
75 #include "case-cfn-macros.h"
76 #include "ppc-auxv.h"
78 /* This file should be included last. */
79 #include "target-def.h"
81 #ifndef TARGET_NO_PROTOTYPE
82 #define TARGET_NO_PROTOTYPE 0
83 #endif
85 #define min(A,B) ((A) < (B) ? (A) : (B))
86 #define max(A,B) ((A) > (B) ? (A) : (B))
88 /* Structure used to define the rs6000 stack */
89 typedef struct rs6000_stack {
90 int reload_completed; /* stack info won't change from here on */
91 int first_gp_reg_save; /* first callee saved GP register used */
92 int first_fp_reg_save; /* first callee saved FP register used */
93 int first_altivec_reg_save; /* first callee saved AltiVec register used */
94 int lr_save_p; /* true if the link reg needs to be saved */
95 int cr_save_p; /* true if the CR reg needs to be saved */
96 unsigned int vrsave_mask; /* mask of vec registers to save */
97 int push_p; /* true if we need to allocate stack space */
98 int calls_p; /* true if the function makes any calls */
99 int world_save_p; /* true if we're saving *everything*:
100 r13-r31, cr, f14-f31, vrsave, v20-v31 */
101 enum rs6000_abi abi; /* which ABI to use */
102 int gp_save_offset; /* offset to save GP regs from initial SP */
103 int fp_save_offset; /* offset to save FP regs from initial SP */
104 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
105 int lr_save_offset; /* offset to save LR from initial SP */
106 int cr_save_offset; /* offset to save CR from initial SP */
107 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
108 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
109 int varargs_save_offset; /* offset to save the varargs registers */
110 int ehrd_offset; /* offset to EH return data */
111 int ehcr_offset; /* offset to EH CR field data */
112 int reg_size; /* register size (4 or 8) */
113 HOST_WIDE_INT vars_size; /* variable save area size */
114 int parm_size; /* outgoing parameter size */
115 int save_size; /* save area size */
116 int fixed_size; /* fixed size of stack frame */
117 int gp_size; /* size of saved GP registers */
118 int fp_size; /* size of saved FP registers */
119 int altivec_size; /* size of saved AltiVec registers */
120 int cr_size; /* size to hold CR if not in fixed area */
121 int vrsave_size; /* size to hold VRSAVE */
122 int altivec_padding_size; /* size of altivec alignment padding */
123 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
124 int spe_padding_size;
125 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
126 int spe_64bit_regs_used;
127 int savres_strategy;
128 } rs6000_stack_t;
130 /* A C structure for machine-specific, per-function data.
131 This is added to the cfun structure. */
132 typedef struct GTY(()) machine_function
134 /* Whether the instruction chain has been scanned already. */
135 int spe_insn_chain_scanned_p;
136 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
137 int ra_needs_full_frame;
138 /* Flags if __builtin_return_address (0) was used. */
139 int ra_need_lr;
140 /* Cache lr_save_p after expansion of builtin_eh_return. */
141 int lr_save_state;
142 /* Whether we need to save the TOC to the reserved stack location in the
143 function prologue. */
144 bool save_toc_in_prologue;
145 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
146 varargs save area. */
147 HOST_WIDE_INT varargs_save_offset;
148 /* Temporary stack slot to use for SDmode copies. This slot is
149 64-bits wide and is allocated early enough so that the offset
150 does not overflow the 16-bit load/store offset field. */
151 rtx sdmode_stack_slot;
152 /* Alternative internal arg pointer for -fsplit-stack. */
153 rtx split_stack_arg_pointer;
154 bool split_stack_argp_used;
155 /* Flag if r2 setup is needed with ELFv2 ABI. */
156 bool r2_setup_needed;
157 /* The components already handled by separate shrink-wrapping, which should
158 not be considered by the prologue and epilogue. */
159 bool gpr_is_wrapped_separately[32];
160 bool lr_is_wrapped_separately;
161 } machine_function;
163 /* Support targetm.vectorize.builtin_mask_for_load. */
164 static GTY(()) tree altivec_builtin_mask_for_load;
166 /* Set to nonzero once AIX common-mode calls have been defined. */
167 static GTY(()) int common_mode_defined;
169 /* Label number of label created for -mrelocatable, to call to so we can
170 get the address of the GOT section */
171 static int rs6000_pic_labelno;
173 #ifdef USING_ELFOS_H
174 /* Counter for labels which are to be placed in .fixup. */
175 int fixuplabelno = 0;
176 #endif
178 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
179 int dot_symbols;
181 /* Specify the machine mode that pointers have. After generation of rtl, the
182 compiler makes no further distinction between pointers and any other objects
183 of this machine mode. The type is unsigned since not all things that
184 include rs6000.h also include machmode.h. */
185 unsigned rs6000_pmode;
187 /* Width in bits of a pointer. */
188 unsigned rs6000_pointer_size;
190 #ifdef HAVE_AS_GNU_ATTRIBUTE
191 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
192 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
193 # endif
194 /* Flag whether floating point values have been passed/returned.
195 Note that this doesn't say whether fprs are used, since the
196 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
197 should be set for soft-float values passed in gprs and ieee128
198 values passed in vsx registers. */
199 static bool rs6000_passes_float;
200 static bool rs6000_passes_long_double;
201 /* Flag whether vector values have been passed/returned. */
202 static bool rs6000_passes_vector;
203 /* Flag whether small (<= 8 byte) structures have been returned. */
204 static bool rs6000_returns_struct;
205 #endif
207 /* Value is TRUE if register/mode pair is acceptable. */
208 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
210 /* Maximum number of registers needed for a given register class and mode. */
211 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
213 /* How many registers are needed for a given register and mode. */
214 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
216 /* Map register number to register class. */
217 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
219 static int dbg_cost_ctrl;
221 /* Built in types. */
222 tree rs6000_builtin_types[RS6000_BTI_MAX];
223 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
225 /* Flag to say the TOC is initialized */
226 int toc_initialized, need_toc_init;
227 char toc_label_name[10];
229 /* Cached value of rs6000_variable_issue. This is cached in
230 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
231 static short cached_can_issue_more;
233 static GTY(()) section *read_only_data_section;
234 static GTY(()) section *private_data_section;
235 static GTY(()) section *tls_data_section;
236 static GTY(()) section *tls_private_data_section;
237 static GTY(()) section *read_only_private_data_section;
238 static GTY(()) section *sdata2_section;
239 static GTY(()) section *toc_section;
241 struct builtin_description
243 const HOST_WIDE_INT mask;
244 const enum insn_code icode;
245 const char *const name;
246 const enum rs6000_builtins code;
249 /* Describe the vector unit used for modes. */
250 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
251 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
253 /* Register classes for various constraints that are based on the target
254 switches. */
255 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
257 /* Describe the alignment of a vector. */
258 int rs6000_vector_align[NUM_MACHINE_MODES];
260 /* Map selected modes to types for builtins. */
261 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
263 /* What modes to automatically generate reciprocal divide estimate (fre) and
264 reciprocal sqrt (frsqrte) for. */
265 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
267 /* Masks to determine which reciprocal esitmate instructions to generate
268 automatically. */
269 enum rs6000_recip_mask {
270 RECIP_SF_DIV = 0x001, /* Use divide estimate */
271 RECIP_DF_DIV = 0x002,
272 RECIP_V4SF_DIV = 0x004,
273 RECIP_V2DF_DIV = 0x008,
275 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
276 RECIP_DF_RSQRT = 0x020,
277 RECIP_V4SF_RSQRT = 0x040,
278 RECIP_V2DF_RSQRT = 0x080,
280 /* Various combination of flags for -mrecip=xxx. */
281 RECIP_NONE = 0,
282 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
283 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
284 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
286 RECIP_HIGH_PRECISION = RECIP_ALL,
288 /* On low precision machines like the power5, don't enable double precision
289 reciprocal square root estimate, since it isn't accurate enough. */
290 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
293 /* -mrecip options. */
294 static struct
296 const char *string; /* option name */
297 unsigned int mask; /* mask bits to set */
298 } recip_options[] = {
299 { "all", RECIP_ALL },
300 { "none", RECIP_NONE },
301 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
302 | RECIP_V2DF_DIV) },
303 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
304 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
305 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
306 | RECIP_V2DF_RSQRT) },
307 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
308 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
311 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
312 static const struct
314 const char *cpu;
315 unsigned int cpuid;
316 } cpu_is_info[] = {
317 { "power9", PPC_PLATFORM_POWER9 },
318 { "power8", PPC_PLATFORM_POWER8 },
319 { "power7", PPC_PLATFORM_POWER7 },
320 { "power6x", PPC_PLATFORM_POWER6X },
321 { "power6", PPC_PLATFORM_POWER6 },
322 { "power5+", PPC_PLATFORM_POWER5_PLUS },
323 { "power5", PPC_PLATFORM_POWER5 },
324 { "ppc970", PPC_PLATFORM_PPC970 },
325 { "power4", PPC_PLATFORM_POWER4 },
326 { "ppca2", PPC_PLATFORM_PPCA2 },
327 { "ppc476", PPC_PLATFORM_PPC476 },
328 { "ppc464", PPC_PLATFORM_PPC464 },
329 { "ppc440", PPC_PLATFORM_PPC440 },
330 { "ppc405", PPC_PLATFORM_PPC405 },
331 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
334 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
335 static const struct
337 const char *hwcap;
338 int mask;
339 unsigned int id;
340 } cpu_supports_info[] = {
341 /* AT_HWCAP masks. */
342 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
343 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
344 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
345 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
346 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
347 { "booke", PPC_FEATURE_BOOKE, 0 },
348 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
349 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
350 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
351 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
352 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
353 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
354 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
355 { "notb", PPC_FEATURE_NO_TB, 0 },
356 { "pa6t", PPC_FEATURE_PA6T, 0 },
357 { "power4", PPC_FEATURE_POWER4, 0 },
358 { "power5", PPC_FEATURE_POWER5, 0 },
359 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
360 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
361 { "ppc32", PPC_FEATURE_32, 0 },
362 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
363 { "ppc64", PPC_FEATURE_64, 0 },
364 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
365 { "smt", PPC_FEATURE_SMT, 0 },
366 { "spe", PPC_FEATURE_HAS_SPE, 0 },
367 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
368 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
369 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
371 /* AT_HWCAP2 masks. */
372 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
373 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
374 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
375 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
376 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
377 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
378 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
379 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
380 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
381 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
384 /* Newer LIBCs explicitly export this symbol to declare that they provide
385 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
386 reference to this symbol whenever we expand a CPU builtin, so that
387 we never link against an old LIBC. */
388 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
390 /* True if we have expanded a CPU builtin. */
391 bool cpu_builtin_p;
393 /* Pointer to function (in rs6000-c.c) that can define or undefine target
394 macros that have changed. Languages that don't support the preprocessor
395 don't link in rs6000-c.c, so we can't call it directly. */
396 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
398 /* Simplfy register classes into simpler classifications. We assume
399 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
400 check for standard register classes (gpr/floating/altivec/vsx) and
401 floating/vector classes (float/altivec/vsx). */
403 enum rs6000_reg_type {
404 NO_REG_TYPE,
405 PSEUDO_REG_TYPE,
406 GPR_REG_TYPE,
407 VSX_REG_TYPE,
408 ALTIVEC_REG_TYPE,
409 FPR_REG_TYPE,
410 SPR_REG_TYPE,
411 CR_REG_TYPE,
412 SPE_ACC_TYPE,
413 SPEFSCR_REG_TYPE
416 /* Map register class to register type. */
417 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
419 /* First/last register type for the 'normal' register types (i.e. general
420 purpose, floating point, altivec, and VSX registers). */
421 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
423 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
426 /* Register classes we care about in secondary reload or go if legitimate
427 address. We only need to worry about GPR, FPR, and Altivec registers here,
428 along an ANY field that is the OR of the 3 register classes. */
430 enum rs6000_reload_reg_type {
431 RELOAD_REG_GPR, /* General purpose registers. */
432 RELOAD_REG_FPR, /* Traditional floating point regs. */
433 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
434 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
435 N_RELOAD_REG
438 /* For setting up register classes, loop through the 3 register classes mapping
439 into real registers, and skip the ANY class, which is just an OR of the
440 bits. */
441 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
442 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
444 /* Map reload register type to a register in the register class. */
445 struct reload_reg_map_type {
446 const char *name; /* Register class name. */
447 int reg; /* Register in the register class. */
450 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
451 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
452 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
453 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
454 { "Any", -1 }, /* RELOAD_REG_ANY. */
457 /* Mask bits for each register class, indexed per mode. Historically the
458 compiler has been more restrictive which types can do PRE_MODIFY instead of
459 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
460 typedef unsigned char addr_mask_type;
462 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
463 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
464 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
465 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
466 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
467 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
468 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
469 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
471 /* Register type masks based on the type, of valid addressing modes. */
472 struct rs6000_reg_addr {
473 enum insn_code reload_load; /* INSN to reload for loading. */
474 enum insn_code reload_store; /* INSN to reload for storing. */
475 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
476 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
477 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
478 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
479 /* INSNs for fusing addi with loads
480 or stores for each reg. class. */
481 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
482 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
483 /* INSNs for fusing addis with loads
484 or stores for each reg. class. */
485 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
486 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
487 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
488 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
489 bool fused_toc; /* Mode supports TOC fusion. */
492 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
494 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
495 static inline bool
496 mode_supports_pre_incdec_p (machine_mode mode)
498 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
499 != 0);
502 /* Helper function to say whether a mode supports PRE_MODIFY. */
503 static inline bool
504 mode_supports_pre_modify_p (machine_mode mode)
506 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
507 != 0);
510 /* Return true if we have D-form addressing in altivec registers. */
511 static inline bool
512 mode_supports_vmx_dform (machine_mode mode)
514 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
517 /* Return true if we have D-form addressing in VSX registers. This addressing
518 is more limited than normal d-form addressing in that the offset must be
519 aligned on a 16-byte boundary. */
520 static inline bool
521 mode_supports_vsx_dform_quad (machine_mode mode)
523 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
524 != 0);
528 /* Target cpu costs. */
530 struct processor_costs {
531 const int mulsi; /* cost of SImode multiplication. */
532 const int mulsi_const; /* cost of SImode multiplication by constant. */
533 const int mulsi_const9; /* cost of SImode mult by short constant. */
534 const int muldi; /* cost of DImode multiplication. */
535 const int divsi; /* cost of SImode division. */
536 const int divdi; /* cost of DImode division. */
537 const int fp; /* cost of simple SFmode and DFmode insns. */
538 const int dmul; /* cost of DFmode multiplication (and fmadd). */
539 const int sdiv; /* cost of SFmode division (fdivs). */
540 const int ddiv; /* cost of DFmode division (fdiv). */
541 const int cache_line_size; /* cache line size in bytes. */
542 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
543 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
544 const int simultaneous_prefetches; /* number of parallel prefetch
545 operations. */
546 const int sfdf_convert; /* cost of SF->DF conversion. */
549 const struct processor_costs *rs6000_cost;
551 /* Processor costs (relative to an add) */
553 /* Instruction size costs on 32bit processors. */
554 static const
555 struct processor_costs size32_cost = {
556 COSTS_N_INSNS (1), /* mulsi */
557 COSTS_N_INSNS (1), /* mulsi_const */
558 COSTS_N_INSNS (1), /* mulsi_const9 */
559 COSTS_N_INSNS (1), /* muldi */
560 COSTS_N_INSNS (1), /* divsi */
561 COSTS_N_INSNS (1), /* divdi */
562 COSTS_N_INSNS (1), /* fp */
563 COSTS_N_INSNS (1), /* dmul */
564 COSTS_N_INSNS (1), /* sdiv */
565 COSTS_N_INSNS (1), /* ddiv */
566 32, /* cache line size */
567 0, /* l1 cache */
568 0, /* l2 cache */
569 0, /* streams */
570 0, /* SF->DF convert */
573 /* Instruction size costs on 64bit processors. */
574 static const
575 struct processor_costs size64_cost = {
576 COSTS_N_INSNS (1), /* mulsi */
577 COSTS_N_INSNS (1), /* mulsi_const */
578 COSTS_N_INSNS (1), /* mulsi_const9 */
579 COSTS_N_INSNS (1), /* muldi */
580 COSTS_N_INSNS (1), /* divsi */
581 COSTS_N_INSNS (1), /* divdi */
582 COSTS_N_INSNS (1), /* fp */
583 COSTS_N_INSNS (1), /* dmul */
584 COSTS_N_INSNS (1), /* sdiv */
585 COSTS_N_INSNS (1), /* ddiv */
586 128, /* cache line size */
587 0, /* l1 cache */
588 0, /* l2 cache */
589 0, /* streams */
590 0, /* SF->DF convert */
593 /* Instruction costs on RS64A processors. */
594 static const
595 struct processor_costs rs64a_cost = {
596 COSTS_N_INSNS (20), /* mulsi */
597 COSTS_N_INSNS (12), /* mulsi_const */
598 COSTS_N_INSNS (8), /* mulsi_const9 */
599 COSTS_N_INSNS (34), /* muldi */
600 COSTS_N_INSNS (65), /* divsi */
601 COSTS_N_INSNS (67), /* divdi */
602 COSTS_N_INSNS (4), /* fp */
603 COSTS_N_INSNS (4), /* dmul */
604 COSTS_N_INSNS (31), /* sdiv */
605 COSTS_N_INSNS (31), /* ddiv */
606 128, /* cache line size */
607 128, /* l1 cache */
608 2048, /* l2 cache */
609 1, /* streams */
610 0, /* SF->DF convert */
613 /* Instruction costs on MPCCORE processors. */
614 static const
615 struct processor_costs mpccore_cost = {
616 COSTS_N_INSNS (2), /* mulsi */
617 COSTS_N_INSNS (2), /* mulsi_const */
618 COSTS_N_INSNS (2), /* mulsi_const9 */
619 COSTS_N_INSNS (2), /* muldi */
620 COSTS_N_INSNS (6), /* divsi */
621 COSTS_N_INSNS (6), /* divdi */
622 COSTS_N_INSNS (4), /* fp */
623 COSTS_N_INSNS (5), /* dmul */
624 COSTS_N_INSNS (10), /* sdiv */
625 COSTS_N_INSNS (17), /* ddiv */
626 32, /* cache line size */
627 4, /* l1 cache */
628 16, /* l2 cache */
629 1, /* streams */
630 0, /* SF->DF convert */
633 /* Instruction costs on PPC403 processors. */
634 static const
635 struct processor_costs ppc403_cost = {
636 COSTS_N_INSNS (4), /* mulsi */
637 COSTS_N_INSNS (4), /* mulsi_const */
638 COSTS_N_INSNS (4), /* mulsi_const9 */
639 COSTS_N_INSNS (4), /* muldi */
640 COSTS_N_INSNS (33), /* divsi */
641 COSTS_N_INSNS (33), /* divdi */
642 COSTS_N_INSNS (11), /* fp */
643 COSTS_N_INSNS (11), /* dmul */
644 COSTS_N_INSNS (11), /* sdiv */
645 COSTS_N_INSNS (11), /* ddiv */
646 32, /* cache line size */
647 4, /* l1 cache */
648 16, /* l2 cache */
649 1, /* streams */
650 0, /* SF->DF convert */
653 /* Instruction costs on PPC405 processors. */
654 static const
655 struct processor_costs ppc405_cost = {
656 COSTS_N_INSNS (5), /* mulsi */
657 COSTS_N_INSNS (4), /* mulsi_const */
658 COSTS_N_INSNS (3), /* mulsi_const9 */
659 COSTS_N_INSNS (5), /* muldi */
660 COSTS_N_INSNS (35), /* divsi */
661 COSTS_N_INSNS (35), /* divdi */
662 COSTS_N_INSNS (11), /* fp */
663 COSTS_N_INSNS (11), /* dmul */
664 COSTS_N_INSNS (11), /* sdiv */
665 COSTS_N_INSNS (11), /* ddiv */
666 32, /* cache line size */
667 16, /* l1 cache */
668 128, /* l2 cache */
669 1, /* streams */
670 0, /* SF->DF convert */
673 /* Instruction costs on PPC440 processors. */
674 static const
675 struct processor_costs ppc440_cost = {
676 COSTS_N_INSNS (3), /* mulsi */
677 COSTS_N_INSNS (2), /* mulsi_const */
678 COSTS_N_INSNS (2), /* mulsi_const9 */
679 COSTS_N_INSNS (3), /* muldi */
680 COSTS_N_INSNS (34), /* divsi */
681 COSTS_N_INSNS (34), /* divdi */
682 COSTS_N_INSNS (5), /* fp */
683 COSTS_N_INSNS (5), /* dmul */
684 COSTS_N_INSNS (19), /* sdiv */
685 COSTS_N_INSNS (33), /* ddiv */
686 32, /* cache line size */
687 32, /* l1 cache */
688 256, /* l2 cache */
689 1, /* streams */
690 0, /* SF->DF convert */
693 /* Instruction costs on PPC476 processors. */
694 static const
695 struct processor_costs ppc476_cost = {
696 COSTS_N_INSNS (4), /* mulsi */
697 COSTS_N_INSNS (4), /* mulsi_const */
698 COSTS_N_INSNS (4), /* mulsi_const9 */
699 COSTS_N_INSNS (4), /* muldi */
700 COSTS_N_INSNS (11), /* divsi */
701 COSTS_N_INSNS (11), /* divdi */
702 COSTS_N_INSNS (6), /* fp */
703 COSTS_N_INSNS (6), /* dmul */
704 COSTS_N_INSNS (19), /* sdiv */
705 COSTS_N_INSNS (33), /* ddiv */
706 32, /* l1 cache line size */
707 32, /* l1 cache */
708 512, /* l2 cache */
709 1, /* streams */
710 0, /* SF->DF convert */
713 /* Instruction costs on PPC601 processors. */
714 static const
715 struct processor_costs ppc601_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (5), /* mulsi_const */
718 COSTS_N_INSNS (5), /* mulsi_const9 */
719 COSTS_N_INSNS (5), /* muldi */
720 COSTS_N_INSNS (36), /* divsi */
721 COSTS_N_INSNS (36), /* divdi */
722 COSTS_N_INSNS (4), /* fp */
723 COSTS_N_INSNS (5), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (31), /* ddiv */
726 32, /* cache line size */
727 32, /* l1 cache */
728 256, /* l2 cache */
729 1, /* streams */
730 0, /* SF->DF convert */
733 /* Instruction costs on PPC603 processors. */
734 static const
735 struct processor_costs ppc603_cost = {
736 COSTS_N_INSNS (5), /* mulsi */
737 COSTS_N_INSNS (3), /* mulsi_const */
738 COSTS_N_INSNS (2), /* mulsi_const9 */
739 COSTS_N_INSNS (5), /* muldi */
740 COSTS_N_INSNS (37), /* divsi */
741 COSTS_N_INSNS (37), /* divdi */
742 COSTS_N_INSNS (3), /* fp */
743 COSTS_N_INSNS (4), /* dmul */
744 COSTS_N_INSNS (18), /* sdiv */
745 COSTS_N_INSNS (33), /* ddiv */
746 32, /* cache line size */
747 8, /* l1 cache */
748 64, /* l2 cache */
749 1, /* streams */
750 0, /* SF->DF convert */
753 /* Instruction costs on PPC604 processors. */
754 static const
755 struct processor_costs ppc604_cost = {
756 COSTS_N_INSNS (4), /* mulsi */
757 COSTS_N_INSNS (4), /* mulsi_const */
758 COSTS_N_INSNS (4), /* mulsi_const9 */
759 COSTS_N_INSNS (4), /* muldi */
760 COSTS_N_INSNS (20), /* divsi */
761 COSTS_N_INSNS (20), /* divdi */
762 COSTS_N_INSNS (3), /* fp */
763 COSTS_N_INSNS (3), /* dmul */
764 COSTS_N_INSNS (18), /* sdiv */
765 COSTS_N_INSNS (32), /* ddiv */
766 32, /* cache line size */
767 16, /* l1 cache */
768 512, /* l2 cache */
769 1, /* streams */
770 0, /* SF->DF convert */
773 /* Instruction costs on PPC604e processors. */
774 static const
775 struct processor_costs ppc604e_cost = {
776 COSTS_N_INSNS (2), /* mulsi */
777 COSTS_N_INSNS (2), /* mulsi_const */
778 COSTS_N_INSNS (2), /* mulsi_const9 */
779 COSTS_N_INSNS (2), /* muldi */
780 COSTS_N_INSNS (20), /* divsi */
781 COSTS_N_INSNS (20), /* divdi */
782 COSTS_N_INSNS (3), /* fp */
783 COSTS_N_INSNS (3), /* dmul */
784 COSTS_N_INSNS (18), /* sdiv */
785 COSTS_N_INSNS (32), /* ddiv */
786 32, /* cache line size */
787 32, /* l1 cache */
788 1024, /* l2 cache */
789 1, /* streams */
790 0, /* SF->DF convert */
793 /* Instruction costs on PPC620 processors. */
794 static const
795 struct processor_costs ppc620_cost = {
796 COSTS_N_INSNS (5), /* mulsi */
797 COSTS_N_INSNS (4), /* mulsi_const */
798 COSTS_N_INSNS (3), /* mulsi_const9 */
799 COSTS_N_INSNS (7), /* muldi */
800 COSTS_N_INSNS (21), /* divsi */
801 COSTS_N_INSNS (37), /* divdi */
802 COSTS_N_INSNS (3), /* fp */
803 COSTS_N_INSNS (3), /* dmul */
804 COSTS_N_INSNS (18), /* sdiv */
805 COSTS_N_INSNS (32), /* ddiv */
806 128, /* cache line size */
807 32, /* l1 cache */
808 1024, /* l2 cache */
809 1, /* streams */
810 0, /* SF->DF convert */
813 /* Instruction costs on PPC630 processors. */
814 static const
815 struct processor_costs ppc630_cost = {
816 COSTS_N_INSNS (5), /* mulsi */
817 COSTS_N_INSNS (4), /* mulsi_const */
818 COSTS_N_INSNS (3), /* mulsi_const9 */
819 COSTS_N_INSNS (7), /* muldi */
820 COSTS_N_INSNS (21), /* divsi */
821 COSTS_N_INSNS (37), /* divdi */
822 COSTS_N_INSNS (3), /* fp */
823 COSTS_N_INSNS (3), /* dmul */
824 COSTS_N_INSNS (17), /* sdiv */
825 COSTS_N_INSNS (21), /* ddiv */
826 128, /* cache line size */
827 64, /* l1 cache */
828 1024, /* l2 cache */
829 1, /* streams */
830 0, /* SF->DF convert */
833 /* Instruction costs on Cell processor. */
834 /* COSTS_N_INSNS (1) ~ one add. */
835 static const
836 struct processor_costs ppccell_cost = {
837 COSTS_N_INSNS (9/2)+2, /* mulsi */
838 COSTS_N_INSNS (6/2), /* mulsi_const */
839 COSTS_N_INSNS (6/2), /* mulsi_const9 */
840 COSTS_N_INSNS (15/2)+2, /* muldi */
841 COSTS_N_INSNS (38/2), /* divsi */
842 COSTS_N_INSNS (70/2), /* divdi */
843 COSTS_N_INSNS (10/2), /* fp */
844 COSTS_N_INSNS (10/2), /* dmul */
845 COSTS_N_INSNS (74/2), /* sdiv */
846 COSTS_N_INSNS (74/2), /* ddiv */
847 128, /* cache line size */
848 32, /* l1 cache */
849 512, /* l2 cache */
850 6, /* streams */
851 0, /* SF->DF convert */
854 /* Instruction costs on PPC750 and PPC7400 processors. */
855 static const
856 struct processor_costs ppc750_cost = {
857 COSTS_N_INSNS (5), /* mulsi */
858 COSTS_N_INSNS (3), /* mulsi_const */
859 COSTS_N_INSNS (2), /* mulsi_const9 */
860 COSTS_N_INSNS (5), /* muldi */
861 COSTS_N_INSNS (17), /* divsi */
862 COSTS_N_INSNS (17), /* divdi */
863 COSTS_N_INSNS (3), /* fp */
864 COSTS_N_INSNS (3), /* dmul */
865 COSTS_N_INSNS (17), /* sdiv */
866 COSTS_N_INSNS (31), /* ddiv */
867 32, /* cache line size */
868 32, /* l1 cache */
869 512, /* l2 cache */
870 1, /* streams */
871 0, /* SF->DF convert */
874 /* Instruction costs on PPC7450 processors. */
875 static const
876 struct processor_costs ppc7450_cost = {
877 COSTS_N_INSNS (4), /* mulsi */
878 COSTS_N_INSNS (3), /* mulsi_const */
879 COSTS_N_INSNS (3), /* mulsi_const9 */
880 COSTS_N_INSNS (4), /* muldi */
881 COSTS_N_INSNS (23), /* divsi */
882 COSTS_N_INSNS (23), /* divdi */
883 COSTS_N_INSNS (5), /* fp */
884 COSTS_N_INSNS (5), /* dmul */
885 COSTS_N_INSNS (21), /* sdiv */
886 COSTS_N_INSNS (35), /* ddiv */
887 32, /* cache line size */
888 32, /* l1 cache */
889 1024, /* l2 cache */
890 1, /* streams */
891 0, /* SF->DF convert */
894 /* Instruction costs on PPC8540 processors. */
895 static const
896 struct processor_costs ppc8540_cost = {
897 COSTS_N_INSNS (4), /* mulsi */
898 COSTS_N_INSNS (4), /* mulsi_const */
899 COSTS_N_INSNS (4), /* mulsi_const9 */
900 COSTS_N_INSNS (4), /* muldi */
901 COSTS_N_INSNS (19), /* divsi */
902 COSTS_N_INSNS (19), /* divdi */
903 COSTS_N_INSNS (4), /* fp */
904 COSTS_N_INSNS (4), /* dmul */
905 COSTS_N_INSNS (29), /* sdiv */
906 COSTS_N_INSNS (29), /* ddiv */
907 32, /* cache line size */
908 32, /* l1 cache */
909 256, /* l2 cache */
910 1, /* prefetch streams /*/
911 0, /* SF->DF convert */
914 /* Instruction costs on E300C2 and E300C3 cores. */
915 static const
916 struct processor_costs ppce300c2c3_cost = {
917 COSTS_N_INSNS (4), /* mulsi */
918 COSTS_N_INSNS (4), /* mulsi_const */
919 COSTS_N_INSNS (4), /* mulsi_const9 */
920 COSTS_N_INSNS (4), /* muldi */
921 COSTS_N_INSNS (19), /* divsi */
922 COSTS_N_INSNS (19), /* divdi */
923 COSTS_N_INSNS (3), /* fp */
924 COSTS_N_INSNS (4), /* dmul */
925 COSTS_N_INSNS (18), /* sdiv */
926 COSTS_N_INSNS (33), /* ddiv */
928 16, /* l1 cache */
929 16, /* l2 cache */
930 1, /* prefetch streams /*/
931 0, /* SF->DF convert */
934 /* Instruction costs on PPCE500MC processors. */
935 static const
936 struct processor_costs ppce500mc_cost = {
937 COSTS_N_INSNS (4), /* mulsi */
938 COSTS_N_INSNS (4), /* mulsi_const */
939 COSTS_N_INSNS (4), /* mulsi_const9 */
940 COSTS_N_INSNS (4), /* muldi */
941 COSTS_N_INSNS (14), /* divsi */
942 COSTS_N_INSNS (14), /* divdi */
943 COSTS_N_INSNS (8), /* fp */
944 COSTS_N_INSNS (10), /* dmul */
945 COSTS_N_INSNS (36), /* sdiv */
946 COSTS_N_INSNS (66), /* ddiv */
947 64, /* cache line size */
948 32, /* l1 cache */
949 128, /* l2 cache */
950 1, /* prefetch streams /*/
951 0, /* SF->DF convert */
954 /* Instruction costs on PPCE500MC64 processors. */
955 static const
956 struct processor_costs ppce500mc64_cost = {
957 COSTS_N_INSNS (4), /* mulsi */
958 COSTS_N_INSNS (4), /* mulsi_const */
959 COSTS_N_INSNS (4), /* mulsi_const9 */
960 COSTS_N_INSNS (4), /* muldi */
961 COSTS_N_INSNS (14), /* divsi */
962 COSTS_N_INSNS (14), /* divdi */
963 COSTS_N_INSNS (4), /* fp */
964 COSTS_N_INSNS (10), /* dmul */
965 COSTS_N_INSNS (36), /* sdiv */
966 COSTS_N_INSNS (66), /* ddiv */
967 64, /* cache line size */
968 32, /* l1 cache */
969 128, /* l2 cache */
970 1, /* prefetch streams /*/
971 0, /* SF->DF convert */
974 /* Instruction costs on PPCE5500 processors. */
975 static const
976 struct processor_costs ppce5500_cost = {
977 COSTS_N_INSNS (5), /* mulsi */
978 COSTS_N_INSNS (5), /* mulsi_const */
979 COSTS_N_INSNS (4), /* mulsi_const9 */
980 COSTS_N_INSNS (5), /* muldi */
981 COSTS_N_INSNS (14), /* divsi */
982 COSTS_N_INSNS (14), /* divdi */
983 COSTS_N_INSNS (7), /* fp */
984 COSTS_N_INSNS (10), /* dmul */
985 COSTS_N_INSNS (36), /* sdiv */
986 COSTS_N_INSNS (66), /* ddiv */
987 64, /* cache line size */
988 32, /* l1 cache */
989 128, /* l2 cache */
990 1, /* prefetch streams /*/
991 0, /* SF->DF convert */
994 /* Instruction costs on PPCE6500 processors. */
995 static const
996 struct processor_costs ppce6500_cost = {
997 COSTS_N_INSNS (5), /* mulsi */
998 COSTS_N_INSNS (5), /* mulsi_const */
999 COSTS_N_INSNS (4), /* mulsi_const9 */
1000 COSTS_N_INSNS (5), /* muldi */
1001 COSTS_N_INSNS (14), /* divsi */
1002 COSTS_N_INSNS (14), /* divdi */
1003 COSTS_N_INSNS (7), /* fp */
1004 COSTS_N_INSNS (10), /* dmul */
1005 COSTS_N_INSNS (36), /* sdiv */
1006 COSTS_N_INSNS (66), /* ddiv */
1007 64, /* cache line size */
1008 32, /* l1 cache */
1009 128, /* l2 cache */
1010 1, /* prefetch streams /*/
1011 0, /* SF->DF convert */
1014 /* Instruction costs on AppliedMicro Titan processors. */
1015 static const
1016 struct processor_costs titan_cost = {
1017 COSTS_N_INSNS (5), /* mulsi */
1018 COSTS_N_INSNS (5), /* mulsi_const */
1019 COSTS_N_INSNS (5), /* mulsi_const9 */
1020 COSTS_N_INSNS (5), /* muldi */
1021 COSTS_N_INSNS (18), /* divsi */
1022 COSTS_N_INSNS (18), /* divdi */
1023 COSTS_N_INSNS (10), /* fp */
1024 COSTS_N_INSNS (10), /* dmul */
1025 COSTS_N_INSNS (46), /* sdiv */
1026 COSTS_N_INSNS (72), /* ddiv */
1027 32, /* cache line size */
1028 32, /* l1 cache */
1029 512, /* l2 cache */
1030 1, /* prefetch streams /*/
1031 0, /* SF->DF convert */
1034 /* Instruction costs on POWER4 and POWER5 processors. */
1035 static const
1036 struct processor_costs power4_cost = {
1037 COSTS_N_INSNS (3), /* mulsi */
1038 COSTS_N_INSNS (2), /* mulsi_const */
1039 COSTS_N_INSNS (2), /* mulsi_const9 */
1040 COSTS_N_INSNS (4), /* muldi */
1041 COSTS_N_INSNS (18), /* divsi */
1042 COSTS_N_INSNS (34), /* divdi */
1043 COSTS_N_INSNS (3), /* fp */
1044 COSTS_N_INSNS (3), /* dmul */
1045 COSTS_N_INSNS (17), /* sdiv */
1046 COSTS_N_INSNS (17), /* ddiv */
1047 128, /* cache line size */
1048 32, /* l1 cache */
1049 1024, /* l2 cache */
1050 8, /* prefetch streams /*/
1051 0, /* SF->DF convert */
1054 /* Instruction costs on POWER6 processors. */
1055 static const
1056 struct processor_costs power6_cost = {
1057 COSTS_N_INSNS (8), /* mulsi */
1058 COSTS_N_INSNS (8), /* mulsi_const */
1059 COSTS_N_INSNS (8), /* mulsi_const9 */
1060 COSTS_N_INSNS (8), /* muldi */
1061 COSTS_N_INSNS (22), /* divsi */
1062 COSTS_N_INSNS (28), /* divdi */
1063 COSTS_N_INSNS (3), /* fp */
1064 COSTS_N_INSNS (3), /* dmul */
1065 COSTS_N_INSNS (13), /* sdiv */
1066 COSTS_N_INSNS (16), /* ddiv */
1067 128, /* cache line size */
1068 64, /* l1 cache */
1069 2048, /* l2 cache */
1070 16, /* prefetch streams */
1071 0, /* SF->DF convert */
1074 /* Instruction costs on POWER7 processors. */
1075 static const
1076 struct processor_costs power7_cost = {
1077 COSTS_N_INSNS (2), /* mulsi */
1078 COSTS_N_INSNS (2), /* mulsi_const */
1079 COSTS_N_INSNS (2), /* mulsi_const9 */
1080 COSTS_N_INSNS (2), /* muldi */
1081 COSTS_N_INSNS (18), /* divsi */
1082 COSTS_N_INSNS (34), /* divdi */
1083 COSTS_N_INSNS (3), /* fp */
1084 COSTS_N_INSNS (3), /* dmul */
1085 COSTS_N_INSNS (13), /* sdiv */
1086 COSTS_N_INSNS (16), /* ddiv */
1087 128, /* cache line size */
1088 32, /* l1 cache */
1089 256, /* l2 cache */
1090 12, /* prefetch streams */
1091 COSTS_N_INSNS (3), /* SF->DF convert */
1094 /* Instruction costs on POWER8 processors. */
1095 static const
1096 struct processor_costs power8_cost = {
1097 COSTS_N_INSNS (3), /* mulsi */
1098 COSTS_N_INSNS (3), /* mulsi_const */
1099 COSTS_N_INSNS (3), /* mulsi_const9 */
1100 COSTS_N_INSNS (3), /* muldi */
1101 COSTS_N_INSNS (19), /* divsi */
1102 COSTS_N_INSNS (35), /* divdi */
1103 COSTS_N_INSNS (3), /* fp */
1104 COSTS_N_INSNS (3), /* dmul */
1105 COSTS_N_INSNS (14), /* sdiv */
1106 COSTS_N_INSNS (17), /* ddiv */
1107 128, /* cache line size */
1108 32, /* l1 cache */
1109 256, /* l2 cache */
1110 12, /* prefetch streams */
1111 COSTS_N_INSNS (3), /* SF->DF convert */
1114 /* Instruction costs on POWER9 processors. */
1115 static const
1116 struct processor_costs power9_cost = {
1117 COSTS_N_INSNS (3), /* mulsi */
1118 COSTS_N_INSNS (3), /* mulsi_const */
1119 COSTS_N_INSNS (3), /* mulsi_const9 */
1120 COSTS_N_INSNS (3), /* muldi */
1121 COSTS_N_INSNS (8), /* divsi */
1122 COSTS_N_INSNS (12), /* divdi */
1123 COSTS_N_INSNS (3), /* fp */
1124 COSTS_N_INSNS (3), /* dmul */
1125 COSTS_N_INSNS (13), /* sdiv */
1126 COSTS_N_INSNS (18), /* ddiv */
1127 128, /* cache line size */
1128 32, /* l1 cache */
1129 512, /* l2 cache */
1130 8, /* prefetch streams */
1131 COSTS_N_INSNS (3), /* SF->DF convert */
1134 /* Instruction costs on POWER A2 processors. */
1135 static const
1136 struct processor_costs ppca2_cost = {
1137 COSTS_N_INSNS (16), /* mulsi */
1138 COSTS_N_INSNS (16), /* mulsi_const */
1139 COSTS_N_INSNS (16), /* mulsi_const9 */
1140 COSTS_N_INSNS (16), /* muldi */
1141 COSTS_N_INSNS (22), /* divsi */
1142 COSTS_N_INSNS (28), /* divdi */
1143 COSTS_N_INSNS (3), /* fp */
1144 COSTS_N_INSNS (3), /* dmul */
1145 COSTS_N_INSNS (59), /* sdiv */
1146 COSTS_N_INSNS (72), /* ddiv */
1148 16, /* l1 cache */
1149 2048, /* l2 cache */
1150 16, /* prefetch streams */
1151 0, /* SF->DF convert */
1155 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1156 #undef RS6000_BUILTIN_0
1157 #undef RS6000_BUILTIN_1
1158 #undef RS6000_BUILTIN_2
1159 #undef RS6000_BUILTIN_3
1160 #undef RS6000_BUILTIN_A
1161 #undef RS6000_BUILTIN_D
1162 #undef RS6000_BUILTIN_E
1163 #undef RS6000_BUILTIN_H
1164 #undef RS6000_BUILTIN_P
1165 #undef RS6000_BUILTIN_Q
1166 #undef RS6000_BUILTIN_S
1167 #undef RS6000_BUILTIN_X
1169 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1170 { NAME, ICODE, MASK, ATTR },
1172 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1173 { NAME, ICODE, MASK, ATTR },
1175 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1176 { NAME, ICODE, MASK, ATTR },
1178 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1179 { NAME, ICODE, MASK, ATTR },
1181 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1182 { NAME, ICODE, MASK, ATTR },
1184 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1185 { NAME, ICODE, MASK, ATTR },
1187 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1188 { NAME, ICODE, MASK, ATTR },
1190 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1191 { NAME, ICODE, MASK, ATTR },
1193 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1194 { NAME, ICODE, MASK, ATTR },
1196 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1197 { NAME, ICODE, MASK, ATTR },
1199 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1200 { NAME, ICODE, MASK, ATTR },
1202 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1203 { NAME, ICODE, MASK, ATTR },
1205 struct rs6000_builtin_info_type {
1206 const char *name;
1207 const enum insn_code icode;
1208 const HOST_WIDE_INT mask;
1209 const unsigned attr;
1212 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1214 #include "rs6000-builtin.def"
1217 #undef RS6000_BUILTIN_0
1218 #undef RS6000_BUILTIN_1
1219 #undef RS6000_BUILTIN_2
1220 #undef RS6000_BUILTIN_3
1221 #undef RS6000_BUILTIN_A
1222 #undef RS6000_BUILTIN_D
1223 #undef RS6000_BUILTIN_E
1224 #undef RS6000_BUILTIN_H
1225 #undef RS6000_BUILTIN_P
1226 #undef RS6000_BUILTIN_Q
1227 #undef RS6000_BUILTIN_S
1228 #undef RS6000_BUILTIN_X
1230 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1231 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1234 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1235 static bool spe_func_has_64bit_regs_p (void);
1236 static struct machine_function * rs6000_init_machine_status (void);
1237 static int rs6000_ra_ever_killed (void);
1238 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1239 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1241 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1242 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1243 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1244 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1245 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1246 bool);
1247 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1248 unsigned int);
1249 static bool is_microcoded_insn (rtx_insn *);
1250 static bool is_nonpipeline_insn (rtx_insn *);
1251 static bool is_cracked_insn (rtx_insn *);
1252 static bool is_load_insn (rtx, rtx *);
1253 static bool is_store_insn (rtx, rtx *);
1254 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1255 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1256 static bool insn_must_be_first_in_group (rtx_insn *);
1257 static bool insn_must_be_last_in_group (rtx_insn *);
1258 static void altivec_init_builtins (void);
1259 static tree builtin_function_type (machine_mode, machine_mode,
1260 machine_mode, machine_mode,
1261 enum rs6000_builtins, const char *name);
1262 static void rs6000_common_init_builtins (void);
1263 static void paired_init_builtins (void);
1264 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1265 static void spe_init_builtins (void);
1266 static void htm_init_builtins (void);
1267 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1268 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1269 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1270 static rs6000_stack_t *rs6000_stack_info (void);
1271 static void is_altivec_return_reg (rtx, void *);
1272 int easy_vector_constant (rtx, machine_mode);
1273 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1274 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1275 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1276 bool, bool);
1277 #if TARGET_MACHO
1278 static void macho_branch_islands (void);
1279 #endif
1280 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1281 int, int *);
1282 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1283 int, int, int *);
1284 static bool rs6000_mode_dependent_address (const_rtx);
1285 static bool rs6000_debug_mode_dependent_address (const_rtx);
1286 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1287 machine_mode, rtx);
1288 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1289 machine_mode,
1290 rtx);
1291 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1292 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1293 enum reg_class);
1294 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1295 machine_mode);
1296 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1297 enum reg_class,
1298 machine_mode);
1299 static bool rs6000_cannot_change_mode_class (machine_mode,
1300 machine_mode,
1301 enum reg_class);
1302 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1303 machine_mode,
1304 enum reg_class);
1305 static bool rs6000_save_toc_in_prologue_p (void);
1306 static rtx rs6000_internal_arg_pointer (void);
1308 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1309 int, int *)
1310 = rs6000_legitimize_reload_address;
1312 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1313 = rs6000_mode_dependent_address;
1315 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1316 machine_mode, rtx)
1317 = rs6000_secondary_reload_class;
1319 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1320 = rs6000_preferred_reload_class;
1322 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1323 machine_mode)
1324 = rs6000_secondary_memory_needed;
1326 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1327 machine_mode,
1328 enum reg_class)
1329 = rs6000_cannot_change_mode_class;
1331 const int INSN_NOT_AVAILABLE = -1;
1333 static void rs6000_print_isa_options (FILE *, int, const char *,
1334 HOST_WIDE_INT);
1335 static void rs6000_print_builtin_options (FILE *, int, const char *,
1336 HOST_WIDE_INT);
1338 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1339 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1340 enum rs6000_reg_type,
1341 machine_mode,
1342 secondary_reload_info *,
1343 bool);
1344 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1345 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1346 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1348 /* Hash table stuff for keeping track of TOC entries. */
1350 struct GTY((for_user)) toc_hash_struct
1352 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1353 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1354 rtx key;
1355 machine_mode key_mode;
1356 int labelno;
1359 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1361 static hashval_t hash (toc_hash_struct *);
1362 static bool equal (toc_hash_struct *, toc_hash_struct *);
1365 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1367 /* Hash table to keep track of the argument types for builtin functions. */
1369 struct GTY((for_user)) builtin_hash_struct
1371 tree type;
1372 machine_mode mode[4]; /* return value + 3 arguments. */
1373 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1376 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1378 static hashval_t hash (builtin_hash_struct *);
1379 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1382 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1385 /* Default register names. */
1386 char rs6000_reg_names[][8] =
1388 "0", "1", "2", "3", "4", "5", "6", "7",
1389 "8", "9", "10", "11", "12", "13", "14", "15",
1390 "16", "17", "18", "19", "20", "21", "22", "23",
1391 "24", "25", "26", "27", "28", "29", "30", "31",
1392 "0", "1", "2", "3", "4", "5", "6", "7",
1393 "8", "9", "10", "11", "12", "13", "14", "15",
1394 "16", "17", "18", "19", "20", "21", "22", "23",
1395 "24", "25", "26", "27", "28", "29", "30", "31",
1396 "mq", "lr", "ctr","ap",
1397 "0", "1", "2", "3", "4", "5", "6", "7",
1398 "ca",
1399 /* AltiVec registers. */
1400 "0", "1", "2", "3", "4", "5", "6", "7",
1401 "8", "9", "10", "11", "12", "13", "14", "15",
1402 "16", "17", "18", "19", "20", "21", "22", "23",
1403 "24", "25", "26", "27", "28", "29", "30", "31",
1404 "vrsave", "vscr",
1405 /* SPE registers. */
1406 "spe_acc", "spefscr",
1407 /* Soft frame pointer. */
1408 "sfp",
1409 /* HTM SPR registers. */
1410 "tfhar", "tfiar", "texasr",
1411 /* SPE High registers. */
1412 "0", "1", "2", "3", "4", "5", "6", "7",
1413 "8", "9", "10", "11", "12", "13", "14", "15",
1414 "16", "17", "18", "19", "20", "21", "22", "23",
1415 "24", "25", "26", "27", "28", "29", "30", "31"
1418 #ifdef TARGET_REGNAMES
1419 static const char alt_reg_names[][8] =
1421 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1422 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1423 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1424 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1425 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1426 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1427 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1428 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1429 "mq", "lr", "ctr", "ap",
1430 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1431 "ca",
1432 /* AltiVec registers. */
1433 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1434 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1435 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1436 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1437 "vrsave", "vscr",
1438 /* SPE registers. */
1439 "spe_acc", "spefscr",
1440 /* Soft frame pointer. */
1441 "sfp",
1442 /* HTM SPR registers. */
1443 "tfhar", "tfiar", "texasr",
1444 /* SPE High registers. */
1445 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1446 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1447 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1448 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1450 #endif
1452 /* Table of valid machine attributes. */
1454 static const struct attribute_spec rs6000_attribute_table[] =
1456 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1457 affects_type_identity } */
1458 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1459 false },
1460 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1461 false },
1462 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1463 false },
1464 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1465 false },
1466 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1467 false },
1468 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1469 SUBTARGET_ATTRIBUTE_TABLE,
1470 #endif
1471 { NULL, 0, 0, false, false, false, NULL, false }
1474 #ifndef TARGET_PROFILE_KERNEL
1475 #define TARGET_PROFILE_KERNEL 0
1476 #endif
1478 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1479 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1481 /* Initialize the GCC target structure. */
1482 #undef TARGET_ATTRIBUTE_TABLE
1483 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1484 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1485 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1486 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1487 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1489 #undef TARGET_ASM_ALIGNED_DI_OP
1490 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1492 /* Default unaligned ops are only provided for ELF. Find the ops needed
1493 for non-ELF systems. */
1494 #ifndef OBJECT_FORMAT_ELF
1495 #if TARGET_XCOFF
1496 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1497 64-bit targets. */
1498 #undef TARGET_ASM_UNALIGNED_HI_OP
1499 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1500 #undef TARGET_ASM_UNALIGNED_SI_OP
1501 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1502 #undef TARGET_ASM_UNALIGNED_DI_OP
1503 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1504 #else
1505 /* For Darwin. */
1506 #undef TARGET_ASM_UNALIGNED_HI_OP
1507 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1508 #undef TARGET_ASM_UNALIGNED_SI_OP
1509 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1510 #undef TARGET_ASM_UNALIGNED_DI_OP
1511 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1512 #undef TARGET_ASM_ALIGNED_DI_OP
1513 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1514 #endif
1515 #endif
1517 /* This hook deals with fixups for relocatable code and DI-mode objects
1518 in 64-bit code. */
1519 #undef TARGET_ASM_INTEGER
1520 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1522 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1523 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1524 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1525 #endif
1527 #undef TARGET_SET_UP_BY_PROLOGUE
1528 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1530 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1531 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1532 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1533 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1534 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1535 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1536 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1537 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1538 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1539 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1540 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1541 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1543 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1544 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1546 #undef TARGET_INTERNAL_ARG_POINTER
1547 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1549 #undef TARGET_HAVE_TLS
1550 #define TARGET_HAVE_TLS HAVE_AS_TLS
1552 #undef TARGET_CANNOT_FORCE_CONST_MEM
1553 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1555 #undef TARGET_DELEGITIMIZE_ADDRESS
1556 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1558 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1559 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1561 #undef TARGET_ASM_FUNCTION_PROLOGUE
1562 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1563 #undef TARGET_ASM_FUNCTION_EPILOGUE
1564 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1566 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1567 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1569 #undef TARGET_LEGITIMIZE_ADDRESS
1570 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1572 #undef TARGET_SCHED_VARIABLE_ISSUE
1573 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1575 #undef TARGET_SCHED_ISSUE_RATE
1576 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1577 #undef TARGET_SCHED_ADJUST_COST
1578 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1579 #undef TARGET_SCHED_ADJUST_PRIORITY
1580 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1581 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1582 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1583 #undef TARGET_SCHED_INIT
1584 #define TARGET_SCHED_INIT rs6000_sched_init
1585 #undef TARGET_SCHED_FINISH
1586 #define TARGET_SCHED_FINISH rs6000_sched_finish
1587 #undef TARGET_SCHED_REORDER
1588 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1589 #undef TARGET_SCHED_REORDER2
1590 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1592 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1593 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1595 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1596 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1598 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1599 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1600 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1601 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1602 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1603 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1604 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1605 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1607 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1608 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1609 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1610 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1611 rs6000_builtin_support_vector_misalignment
1612 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1613 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1614 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1615 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1616 rs6000_builtin_vectorization_cost
1617 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1618 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1619 rs6000_preferred_simd_mode
1620 #undef TARGET_VECTORIZE_INIT_COST
1621 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1622 #undef TARGET_VECTORIZE_ADD_STMT_COST
1623 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1624 #undef TARGET_VECTORIZE_FINISH_COST
1625 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1626 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1627 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1629 #undef TARGET_INIT_BUILTINS
1630 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1631 #undef TARGET_BUILTIN_DECL
1632 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1634 #undef TARGET_FOLD_BUILTIN
1635 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1636 #undef TARGET_GIMPLE_FOLD_BUILTIN
1637 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1639 #undef TARGET_EXPAND_BUILTIN
1640 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1642 #undef TARGET_MANGLE_TYPE
1643 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1645 #undef TARGET_INIT_LIBFUNCS
1646 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1648 #if TARGET_MACHO
1649 #undef TARGET_BINDS_LOCAL_P
1650 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1651 #endif
1653 #undef TARGET_MS_BITFIELD_LAYOUT_P
1654 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1656 #undef TARGET_ASM_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1659 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1660 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1662 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1663 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1665 #undef TARGET_REGISTER_MOVE_COST
1666 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1667 #undef TARGET_MEMORY_MOVE_COST
1668 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1669 #undef TARGET_CANNOT_COPY_INSN_P
1670 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1671 #undef TARGET_RTX_COSTS
1672 #define TARGET_RTX_COSTS rs6000_rtx_costs
1673 #undef TARGET_ADDRESS_COST
1674 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1676 #undef TARGET_DWARF_REGISTER_SPAN
1677 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1679 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1680 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1682 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1683 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1685 #undef TARGET_PROMOTE_FUNCTION_MODE
1686 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1688 #undef TARGET_RETURN_IN_MEMORY
1689 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1691 #undef TARGET_RETURN_IN_MSB
1692 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1694 #undef TARGET_SETUP_INCOMING_VARARGS
1695 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1697 /* Always strict argument naming on rs6000. */
1698 #undef TARGET_STRICT_ARGUMENT_NAMING
1699 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1700 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1701 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1702 #undef TARGET_SPLIT_COMPLEX_ARG
1703 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1704 #undef TARGET_MUST_PASS_IN_STACK
1705 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1706 #undef TARGET_PASS_BY_REFERENCE
1707 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1708 #undef TARGET_ARG_PARTIAL_BYTES
1709 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1710 #undef TARGET_FUNCTION_ARG_ADVANCE
1711 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1712 #undef TARGET_FUNCTION_ARG
1713 #define TARGET_FUNCTION_ARG rs6000_function_arg
1714 #undef TARGET_FUNCTION_ARG_BOUNDARY
1715 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1717 #undef TARGET_BUILD_BUILTIN_VA_LIST
1718 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1720 #undef TARGET_EXPAND_BUILTIN_VA_START
1721 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1723 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1724 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1726 #undef TARGET_EH_RETURN_FILTER_MODE
1727 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1729 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1730 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1732 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1733 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1735 #undef TARGET_FLOATN_MODE
1736 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1738 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1739 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1741 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1742 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1744 #undef TARGET_MD_ASM_ADJUST
1745 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1747 #undef TARGET_OPTION_OVERRIDE
1748 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1750 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1751 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1752 rs6000_builtin_vectorized_function
1754 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1755 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1756 rs6000_builtin_md_vectorized_function
1758 #ifdef TARGET_THREAD_SSP_OFFSET
1759 #undef TARGET_STACK_PROTECT_GUARD
1760 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
1761 #endif
1763 #if !TARGET_MACHO
1764 #undef TARGET_STACK_PROTECT_FAIL
1765 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1766 #endif
1768 #ifdef HAVE_AS_TLS
1769 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1770 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1771 #endif
1773 /* Use a 32-bit anchor range. This leads to sequences like:
1775 addis tmp,anchor,high
1776 add dest,tmp,low
1778 where tmp itself acts as an anchor, and can be shared between
1779 accesses to the same 64k page. */
1780 #undef TARGET_MIN_ANCHOR_OFFSET
1781 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1782 #undef TARGET_MAX_ANCHOR_OFFSET
1783 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1784 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1785 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1786 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1787 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1789 #undef TARGET_BUILTIN_RECIPROCAL
1790 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1792 #undef TARGET_EXPAND_TO_RTL_HOOK
1793 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1795 #undef TARGET_INSTANTIATE_DECLS
1796 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1798 #undef TARGET_SECONDARY_RELOAD
1799 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1801 #undef TARGET_LEGITIMATE_ADDRESS_P
1802 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1804 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1805 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1807 #undef TARGET_LRA_P
1808 #define TARGET_LRA_P rs6000_lra_p
1810 #undef TARGET_CAN_ELIMINATE
1811 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1813 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1814 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1816 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1817 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1819 #undef TARGET_TRAMPOLINE_INIT
1820 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1822 #undef TARGET_FUNCTION_VALUE
1823 #define TARGET_FUNCTION_VALUE rs6000_function_value
1825 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1826 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1828 #undef TARGET_OPTION_SAVE
1829 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1831 #undef TARGET_OPTION_RESTORE
1832 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1834 #undef TARGET_OPTION_PRINT
1835 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1837 #undef TARGET_CAN_INLINE_P
1838 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1840 #undef TARGET_SET_CURRENT_FUNCTION
1841 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1843 #undef TARGET_LEGITIMATE_CONSTANT_P
1844 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1846 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1847 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1849 #undef TARGET_CAN_USE_DOLOOP_P
1850 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1852 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1853 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1855 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1856 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1857 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1858 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1859 #undef TARGET_UNWIND_WORD_MODE
1860 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1862 #undef TARGET_OFFLOAD_OPTIONS
1863 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1865 #undef TARGET_C_MODE_FOR_SUFFIX
1866 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1868 #undef TARGET_INVALID_BINARY_OP
1869 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1871 #undef TARGET_OPTAB_SUPPORTED_P
1872 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1874 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1875 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1878 /* Processor table. */
1879 struct rs6000_ptt
1881 const char *const name; /* Canonical processor name. */
1882 const enum processor_type processor; /* Processor type enum value. */
1883 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1886 static struct rs6000_ptt const processor_target_table[] =
1888 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1889 #include "rs6000-cpus.def"
1890 #undef RS6000_CPU
1893 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1894 name is invalid. */
1896 static int
1897 rs6000_cpu_name_lookup (const char *name)
1899 size_t i;
1901 if (name != NULL)
1903 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1904 if (! strcmp (name, processor_target_table[i].name))
1905 return (int)i;
1908 return -1;
1912 /* Return number of consecutive hard regs needed starting at reg REGNO
1913 to hold something of mode MODE.
1914 This is ordinarily the length in words of a value of mode MODE
1915 but can be less for certain modes in special long registers.
1917 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1918 scalar instructions. The upper 32 bits are only available to the
1919 SIMD instructions.
1921 POWER and PowerPC GPRs hold 32 bits worth;
1922 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1924 static int
1925 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1927 unsigned HOST_WIDE_INT reg_size;
1929 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1930 128-bit floating point that can go in vector registers, which has VSX
1931 memory addressing. */
1932 if (FP_REGNO_P (regno))
1933 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1934 ? UNITS_PER_VSX_WORD
1935 : UNITS_PER_FP_WORD);
1937 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1938 reg_size = UNITS_PER_SPE_WORD;
1940 else if (ALTIVEC_REGNO_P (regno))
1941 reg_size = UNITS_PER_ALTIVEC_WORD;
1943 /* The value returned for SCmode in the E500 double case is 2 for
1944 ABI compatibility; storing an SCmode value in a single register
1945 would require function_arg and rs6000_spe_function_arg to handle
1946 SCmode so as to pass the value correctly in a pair of
1947 registers. */
1948 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1949 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1950 reg_size = UNITS_PER_FP_WORD;
1952 else
1953 reg_size = UNITS_PER_WORD;
1955 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1958 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1959 MODE. */
1960 static int
1961 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1963 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1965 if (COMPLEX_MODE_P (mode))
1966 mode = GET_MODE_INNER (mode);
1968 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1969 register combinations, and use PTImode where we need to deal with quad
1970 word memory operations. Don't allow quad words in the argument or frame
1971 pointer registers, just registers 0..31. */
1972 if (mode == PTImode)
1973 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1974 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1975 && ((regno & 1) == 0));
1977 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1978 implementations. Don't allow an item to be split between a FP register
1979 and an Altivec register. Allow TImode in all VSX registers if the user
1980 asked for it. */
1981 if (TARGET_VSX && VSX_REGNO_P (regno)
1982 && (VECTOR_MEM_VSX_P (mode)
1983 || FLOAT128_VECTOR_P (mode)
1984 || reg_addr[mode].scalar_in_vmx_p
1985 || (TARGET_VSX_TIMODE && mode == TImode)
1986 || (TARGET_VADDUQM && mode == V1TImode)))
1988 if (FP_REGNO_P (regno))
1989 return FP_REGNO_P (last_regno);
1991 if (ALTIVEC_REGNO_P (regno))
1993 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1994 return 0;
1996 return ALTIVEC_REGNO_P (last_regno);
2000 /* The GPRs can hold any mode, but values bigger than one register
2001 cannot go past R31. */
2002 if (INT_REGNO_P (regno))
2003 return INT_REGNO_P (last_regno);
2005 /* The float registers (except for VSX vector modes) can only hold floating
2006 modes and DImode. */
2007 if (FP_REGNO_P (regno))
2009 if (FLOAT128_VECTOR_P (mode))
2010 return false;
2012 if (SCALAR_FLOAT_MODE_P (mode)
2013 && (mode != TDmode || (regno % 2) == 0)
2014 && FP_REGNO_P (last_regno))
2015 return 1;
2017 if (GET_MODE_CLASS (mode) == MODE_INT)
2019 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2020 return 1;
2022 if (TARGET_VSX_SMALL_INTEGER)
2024 if (mode == SImode)
2025 return 1;
2027 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2028 return 1;
2032 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2033 && PAIRED_VECTOR_MODE (mode))
2034 return 1;
2036 return 0;
2039 /* The CR register can only hold CC modes. */
2040 if (CR_REGNO_P (regno))
2041 return GET_MODE_CLASS (mode) == MODE_CC;
2043 if (CA_REGNO_P (regno))
2044 return mode == Pmode || mode == SImode;
2046 /* AltiVec only in AldyVec registers. */
2047 if (ALTIVEC_REGNO_P (regno))
2048 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2049 || mode == V1TImode);
2051 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2052 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2053 return 1;
2055 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2056 and it must be able to fit within the register set. */
2058 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2061 /* Print interesting facts about registers. */
2062 static void
2063 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2065 int r, m;
2067 for (r = first_regno; r <= last_regno; ++r)
2069 const char *comma = "";
2070 int len;
2072 if (first_regno == last_regno)
2073 fprintf (stderr, "%s:\t", reg_name);
2074 else
2075 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2077 len = 8;
2078 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2079 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2081 if (len > 70)
2083 fprintf (stderr, ",\n\t");
2084 len = 8;
2085 comma = "";
2088 if (rs6000_hard_regno_nregs[m][r] > 1)
2089 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2090 rs6000_hard_regno_nregs[m][r]);
2091 else
2092 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2094 comma = ", ";
2097 if (call_used_regs[r])
2099 if (len > 70)
2101 fprintf (stderr, ",\n\t");
2102 len = 8;
2103 comma = "";
2106 len += fprintf (stderr, "%s%s", comma, "call-used");
2107 comma = ", ";
2110 if (fixed_regs[r])
2112 if (len > 70)
2114 fprintf (stderr, ",\n\t");
2115 len = 8;
2116 comma = "";
2119 len += fprintf (stderr, "%s%s", comma, "fixed");
2120 comma = ", ";
2123 if (len > 70)
2125 fprintf (stderr, ",\n\t");
2126 comma = "";
2129 len += fprintf (stderr, "%sreg-class = %s", comma,
2130 reg_class_names[(int)rs6000_regno_regclass[r]]);
2131 comma = ", ";
2133 if (len > 70)
2135 fprintf (stderr, ",\n\t");
2136 comma = "";
2139 fprintf (stderr, "%sregno = %d\n", comma, r);
2143 static const char *
2144 rs6000_debug_vector_unit (enum rs6000_vector v)
2146 const char *ret;
2148 switch (v)
2150 case VECTOR_NONE: ret = "none"; break;
2151 case VECTOR_ALTIVEC: ret = "altivec"; break;
2152 case VECTOR_VSX: ret = "vsx"; break;
2153 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2154 case VECTOR_PAIRED: ret = "paired"; break;
2155 case VECTOR_SPE: ret = "spe"; break;
2156 case VECTOR_OTHER: ret = "other"; break;
2157 default: ret = "unknown"; break;
2160 return ret;
2163 /* Inner function printing just the address mask for a particular reload
2164 register class. */
2165 DEBUG_FUNCTION char *
2166 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2168 static char ret[8];
2169 char *p = ret;
2171 if ((mask & RELOAD_REG_VALID) != 0)
2172 *p++ = 'v';
2173 else if (keep_spaces)
2174 *p++ = ' ';
2176 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2177 *p++ = 'm';
2178 else if (keep_spaces)
2179 *p++ = ' ';
2181 if ((mask & RELOAD_REG_INDEXED) != 0)
2182 *p++ = 'i';
2183 else if (keep_spaces)
2184 *p++ = ' ';
2186 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2187 *p++ = 'O';
2188 else if ((mask & RELOAD_REG_OFFSET) != 0)
2189 *p++ = 'o';
2190 else if (keep_spaces)
2191 *p++ = ' ';
2193 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2194 *p++ = '+';
2195 else if (keep_spaces)
2196 *p++ = ' ';
2198 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2199 *p++ = '+';
2200 else if (keep_spaces)
2201 *p++ = ' ';
2203 if ((mask & RELOAD_REG_AND_M16) != 0)
2204 *p++ = '&';
2205 else if (keep_spaces)
2206 *p++ = ' ';
2208 *p = '\0';
2210 return ret;
2213 /* Print the address masks in a human readble fashion. */
2214 DEBUG_FUNCTION void
2215 rs6000_debug_print_mode (ssize_t m)
2217 ssize_t rc;
2218 int spaces = 0;
2219 bool fuse_extra_p;
2221 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2222 for (rc = 0; rc < N_RELOAD_REG; rc++)
2223 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2224 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2226 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2227 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2228 fprintf (stderr, " Reload=%c%c",
2229 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2230 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2231 else
2232 spaces += sizeof (" Reload=sl") - 1;
2234 if (reg_addr[m].scalar_in_vmx_p)
2236 fprintf (stderr, "%*s Upper=y", spaces, "");
2237 spaces = 0;
2239 else
2240 spaces += sizeof (" Upper=y") - 1;
2242 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2243 || reg_addr[m].fused_toc);
2244 if (!fuse_extra_p)
2246 for (rc = 0; rc < N_RELOAD_REG; rc++)
2248 if (rc != RELOAD_REG_ANY)
2250 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2251 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2252 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2253 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2254 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2256 fuse_extra_p = true;
2257 break;
2263 if (fuse_extra_p)
2265 fprintf (stderr, "%*s Fuse:", spaces, "");
2266 spaces = 0;
2268 for (rc = 0; rc < N_RELOAD_REG; rc++)
2270 if (rc != RELOAD_REG_ANY)
2272 char load, store;
2274 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2275 load = 'l';
2276 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2277 load = 'L';
2278 else
2279 load = '-';
2281 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2282 store = 's';
2283 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2284 store = 'S';
2285 else
2286 store = '-';
2288 if (load == '-' && store == '-')
2289 spaces += 5;
2290 else
2292 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2293 reload_reg_map[rc].name[0], load, store);
2294 spaces = 0;
2299 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2301 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2302 spaces = 0;
2304 else
2305 spaces += sizeof (" P8gpr") - 1;
2307 if (reg_addr[m].fused_toc)
2309 fprintf (stderr, "%*sToc", (spaces + 1), "");
2310 spaces = 0;
2312 else
2313 spaces += sizeof (" Toc") - 1;
2315 else
2316 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2318 if (rs6000_vector_unit[m] != VECTOR_NONE
2319 || rs6000_vector_mem[m] != VECTOR_NONE)
2321 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2322 spaces, "",
2323 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2324 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2327 fputs ("\n", stderr);
2330 #define DEBUG_FMT_ID "%-32s= "
2331 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2332 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2333 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2335 /* Print various interesting information with -mdebug=reg. */
2336 static void
2337 rs6000_debug_reg_global (void)
2339 static const char *const tf[2] = { "false", "true" };
2340 const char *nl = (const char *)0;
2341 int m;
2342 size_t m1, m2, v;
2343 char costly_num[20];
2344 char nop_num[20];
2345 char flags_buffer[40];
2346 const char *costly_str;
2347 const char *nop_str;
2348 const char *trace_str;
2349 const char *abi_str;
2350 const char *cmodel_str;
2351 struct cl_target_option cl_opts;
2353 /* Modes we want tieable information on. */
2354 static const machine_mode print_tieable_modes[] = {
2355 QImode,
2356 HImode,
2357 SImode,
2358 DImode,
2359 TImode,
2360 PTImode,
2361 SFmode,
2362 DFmode,
2363 TFmode,
2364 IFmode,
2365 KFmode,
2366 SDmode,
2367 DDmode,
2368 TDmode,
2369 V8QImode,
2370 V4HImode,
2371 V2SImode,
2372 V16QImode,
2373 V8HImode,
2374 V4SImode,
2375 V2DImode,
2376 V1TImode,
2377 V32QImode,
2378 V16HImode,
2379 V8SImode,
2380 V4DImode,
2381 V2TImode,
2382 V2SFmode,
2383 V4SFmode,
2384 V2DFmode,
2385 V8SFmode,
2386 V4DFmode,
2387 CCmode,
2388 CCUNSmode,
2389 CCEQmode,
2392 /* Virtual regs we are interested in. */
2393 const static struct {
2394 int regno; /* register number. */
2395 const char *name; /* register name. */
2396 } virtual_regs[] = {
2397 { STACK_POINTER_REGNUM, "stack pointer:" },
2398 { TOC_REGNUM, "toc: " },
2399 { STATIC_CHAIN_REGNUM, "static chain: " },
2400 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2401 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2402 { ARG_POINTER_REGNUM, "arg pointer: " },
2403 { FRAME_POINTER_REGNUM, "frame pointer:" },
2404 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2405 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2406 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2407 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2408 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2409 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2410 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2411 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2412 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2415 fputs ("\nHard register information:\n", stderr);
2416 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2417 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2418 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2419 LAST_ALTIVEC_REGNO,
2420 "vs");
2421 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2422 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2423 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2424 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2425 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2426 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2427 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2428 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2430 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2431 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2432 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2434 fprintf (stderr,
2435 "\n"
2436 "d reg_class = %s\n"
2437 "f reg_class = %s\n"
2438 "v reg_class = %s\n"
2439 "wa reg_class = %s\n"
2440 "wb reg_class = %s\n"
2441 "wd reg_class = %s\n"
2442 "we reg_class = %s\n"
2443 "wf reg_class = %s\n"
2444 "wg reg_class = %s\n"
2445 "wh reg_class = %s\n"
2446 "wi reg_class = %s\n"
2447 "wj reg_class = %s\n"
2448 "wk reg_class = %s\n"
2449 "wl reg_class = %s\n"
2450 "wm reg_class = %s\n"
2451 "wo reg_class = %s\n"
2452 "wp reg_class = %s\n"
2453 "wq reg_class = %s\n"
2454 "wr reg_class = %s\n"
2455 "ws reg_class = %s\n"
2456 "wt reg_class = %s\n"
2457 "wu reg_class = %s\n"
2458 "wv reg_class = %s\n"
2459 "ww reg_class = %s\n"
2460 "wx reg_class = %s\n"
2461 "wy reg_class = %s\n"
2462 "wz reg_class = %s\n"
2463 "wH reg_class = %s\n"
2464 "wI reg_class = %s\n"
2465 "wJ reg_class = %s\n"
2466 "wK reg_class = %s\n"
2467 "\n",
2468 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2469 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2470 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2471 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2472 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2473 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2474 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2475 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2476 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2478 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2479 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2480 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2481 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2482 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2483 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2484 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2485 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2486 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2487 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2488 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2489 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2490 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2491 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2492 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2493 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2494 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2495 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2496 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2497 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2498 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2500 nl = "\n";
2501 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2502 rs6000_debug_print_mode (m);
2504 fputs ("\n", stderr);
2506 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2508 machine_mode mode1 = print_tieable_modes[m1];
2509 bool first_time = true;
2511 nl = (const char *)0;
2512 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2514 machine_mode mode2 = print_tieable_modes[m2];
2515 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2517 if (first_time)
2519 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2520 nl = "\n";
2521 first_time = false;
2524 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2528 if (!first_time)
2529 fputs ("\n", stderr);
2532 if (nl)
2533 fputs (nl, stderr);
2535 if (rs6000_recip_control)
2537 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2539 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2540 if (rs6000_recip_bits[m])
2542 fprintf (stderr,
2543 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2544 GET_MODE_NAME (m),
2545 (RS6000_RECIP_AUTO_RE_P (m)
2546 ? "auto"
2547 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2548 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2549 ? "auto"
2550 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2553 fputs ("\n", stderr);
2556 if (rs6000_cpu_index >= 0)
2558 const char *name = processor_target_table[rs6000_cpu_index].name;
2559 HOST_WIDE_INT flags
2560 = processor_target_table[rs6000_cpu_index].target_enable;
2562 sprintf (flags_buffer, "-mcpu=%s flags", name);
2563 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2565 else
2566 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2568 if (rs6000_tune_index >= 0)
2570 const char *name = processor_target_table[rs6000_tune_index].name;
2571 HOST_WIDE_INT flags
2572 = processor_target_table[rs6000_tune_index].target_enable;
2574 sprintf (flags_buffer, "-mtune=%s flags", name);
2575 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2577 else
2578 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2580 cl_target_option_save (&cl_opts, &global_options);
2581 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2582 rs6000_isa_flags);
2584 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2585 rs6000_isa_flags_explicit);
2587 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2588 rs6000_builtin_mask);
2590 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2592 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2593 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2595 switch (rs6000_sched_costly_dep)
2597 case max_dep_latency:
2598 costly_str = "max_dep_latency";
2599 break;
2601 case no_dep_costly:
2602 costly_str = "no_dep_costly";
2603 break;
2605 case all_deps_costly:
2606 costly_str = "all_deps_costly";
2607 break;
2609 case true_store_to_load_dep_costly:
2610 costly_str = "true_store_to_load_dep_costly";
2611 break;
2613 case store_to_load_dep_costly:
2614 costly_str = "store_to_load_dep_costly";
2615 break;
2617 default:
2618 costly_str = costly_num;
2619 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2620 break;
2623 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2625 switch (rs6000_sched_insert_nops)
2627 case sched_finish_regroup_exact:
2628 nop_str = "sched_finish_regroup_exact";
2629 break;
2631 case sched_finish_pad_groups:
2632 nop_str = "sched_finish_pad_groups";
2633 break;
2635 case sched_finish_none:
2636 nop_str = "sched_finish_none";
2637 break;
2639 default:
2640 nop_str = nop_num;
2641 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2642 break;
2645 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2647 switch (rs6000_sdata)
2649 default:
2650 case SDATA_NONE:
2651 break;
2653 case SDATA_DATA:
2654 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2655 break;
2657 case SDATA_SYSV:
2658 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2659 break;
2661 case SDATA_EABI:
2662 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2663 break;
2667 switch (rs6000_traceback)
2669 case traceback_default: trace_str = "default"; break;
2670 case traceback_none: trace_str = "none"; break;
2671 case traceback_part: trace_str = "part"; break;
2672 case traceback_full: trace_str = "full"; break;
2673 default: trace_str = "unknown"; break;
2676 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2678 switch (rs6000_current_cmodel)
2680 case CMODEL_SMALL: cmodel_str = "small"; break;
2681 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2682 case CMODEL_LARGE: cmodel_str = "large"; break;
2683 default: cmodel_str = "unknown"; break;
2686 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2688 switch (rs6000_current_abi)
2690 case ABI_NONE: abi_str = "none"; break;
2691 case ABI_AIX: abi_str = "aix"; break;
2692 case ABI_ELFv2: abi_str = "ELFv2"; break;
2693 case ABI_V4: abi_str = "V4"; break;
2694 case ABI_DARWIN: abi_str = "darwin"; break;
2695 default: abi_str = "unknown"; break;
2698 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2700 if (rs6000_altivec_abi)
2701 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2703 if (rs6000_spe_abi)
2704 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2706 if (rs6000_darwin64_abi)
2707 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2709 if (rs6000_float_gprs)
2710 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2712 fprintf (stderr, DEBUG_FMT_S, "fprs",
2713 (TARGET_FPRS ? "true" : "false"));
2715 fprintf (stderr, DEBUG_FMT_S, "single_float",
2716 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2718 fprintf (stderr, DEBUG_FMT_S, "double_float",
2719 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2721 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2722 (TARGET_SOFT_FLOAT ? "true" : "false"));
2724 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2725 (TARGET_E500_SINGLE ? "true" : "false"));
2727 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2728 (TARGET_E500_DOUBLE ? "true" : "false"));
2730 if (TARGET_LINK_STACK)
2731 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2733 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2735 if (TARGET_P8_FUSION)
2737 char options[80];
2739 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2740 if (TARGET_TOC_FUSION)
2741 strcat (options, ", toc");
2743 if (TARGET_P8_FUSION_SIGN)
2744 strcat (options, ", sign");
2746 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2749 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2750 TARGET_SECURE_PLT ? "secure" : "bss");
2751 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2752 aix_struct_return ? "aix" : "sysv");
2753 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2754 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2755 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2756 tf[!!rs6000_align_branch_targets]);
2757 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2758 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2759 rs6000_long_double_type_size);
2760 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2761 (int)rs6000_sched_restricted_insns_priority);
2762 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2763 (int)END_BUILTINS);
2764 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2765 (int)RS6000_BUILTIN_COUNT);
2767 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2768 (int)TARGET_FLOAT128_ENABLE_TYPE);
2770 if (TARGET_VSX)
2771 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2772 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2774 if (TARGET_DIRECT_MOVE_128)
2775 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2776 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2780 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2781 legitimate address support to figure out the appropriate addressing to
2782 use. */
2784 static void
2785 rs6000_setup_reg_addr_masks (void)
2787 ssize_t rc, reg, m, nregs;
2788 addr_mask_type any_addr_mask, addr_mask;
2790 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2792 machine_mode m2 = (machine_mode) m;
2793 bool complex_p = false;
2794 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2795 size_t msize;
2797 if (COMPLEX_MODE_P (m2))
2799 complex_p = true;
2800 m2 = GET_MODE_INNER (m2);
2803 msize = GET_MODE_SIZE (m2);
2805 /* SDmode is special in that we want to access it only via REG+REG
2806 addressing on power7 and above, since we want to use the LFIWZX and
2807 STFIWZX instructions to load it. */
2808 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2810 any_addr_mask = 0;
2811 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2813 addr_mask = 0;
2814 reg = reload_reg_map[rc].reg;
2816 /* Can mode values go in the GPR/FPR/Altivec registers? */
2817 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2819 bool small_int_vsx_p = (small_int_p
2820 && (rc == RELOAD_REG_FPR
2821 || rc == RELOAD_REG_VMX));
2823 nregs = rs6000_hard_regno_nregs[m][reg];
2824 addr_mask |= RELOAD_REG_VALID;
2826 /* Indicate if the mode takes more than 1 physical register. If
2827 it takes a single register, indicate it can do REG+REG
2828 addressing. Small integers in VSX registers can only do
2829 REG+REG addressing. */
2830 if (small_int_vsx_p)
2831 addr_mask |= RELOAD_REG_INDEXED;
2832 else if (nregs > 1 || m == BLKmode || complex_p)
2833 addr_mask |= RELOAD_REG_MULTIPLE;
2834 else
2835 addr_mask |= RELOAD_REG_INDEXED;
2837 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2838 addressing. Restrict addressing on SPE for 64-bit types
2839 because of the SUBREG hackery used to address 64-bit floats in
2840 '32-bit' GPRs. If we allow scalars into Altivec registers,
2841 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2843 if (TARGET_UPDATE
2844 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2845 && msize <= 8
2846 && !VECTOR_MODE_P (m2)
2847 && !FLOAT128_VECTOR_P (m2)
2848 && !complex_p
2849 && !small_int_vsx_p
2850 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2851 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2852 && !(TARGET_E500_DOUBLE && msize == 8))
2854 addr_mask |= RELOAD_REG_PRE_INCDEC;
2856 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2857 we don't allow PRE_MODIFY for some multi-register
2858 operations. */
2859 switch (m)
2861 default:
2862 addr_mask |= RELOAD_REG_PRE_MODIFY;
2863 break;
2865 case DImode:
2866 if (TARGET_POWERPC64)
2867 addr_mask |= RELOAD_REG_PRE_MODIFY;
2868 break;
2870 case DFmode:
2871 case DDmode:
2872 if (TARGET_DF_INSN)
2873 addr_mask |= RELOAD_REG_PRE_MODIFY;
2874 break;
2879 /* GPR and FPR registers can do REG+OFFSET addressing, except
2880 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2881 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2882 if ((addr_mask != 0) && !indexed_only_p
2883 && msize <= 8
2884 && (rc == RELOAD_REG_GPR
2885 || ((msize == 8 || m2 == SFmode)
2886 && (rc == RELOAD_REG_FPR
2887 || (rc == RELOAD_REG_VMX
2888 && TARGET_P9_DFORM_SCALAR)))))
2889 addr_mask |= RELOAD_REG_OFFSET;
2891 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2892 instructions are enabled. The offset for 128-bit VSX registers is
2893 only 12-bits. While GPRs can handle the full offset range, VSX
2894 registers can only handle the restricted range. */
2895 else if ((addr_mask != 0) && !indexed_only_p
2896 && msize == 16 && TARGET_P9_DFORM_VECTOR
2897 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2898 || (m2 == TImode && TARGET_VSX_TIMODE)))
2900 addr_mask |= RELOAD_REG_OFFSET;
2901 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2902 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2905 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2906 addressing on 128-bit types. */
2907 if (rc == RELOAD_REG_VMX && msize == 16
2908 && (addr_mask & RELOAD_REG_VALID) != 0)
2909 addr_mask |= RELOAD_REG_AND_M16;
2911 reg_addr[m].addr_mask[rc] = addr_mask;
2912 any_addr_mask |= addr_mask;
2915 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2920 /* Initialize the various global tables that are based on register size. */
2921 static void
2922 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2924 ssize_t r, m, c;
2925 int align64;
2926 int align32;
2928 /* Precalculate REGNO_REG_CLASS. */
2929 rs6000_regno_regclass[0] = GENERAL_REGS;
2930 for (r = 1; r < 32; ++r)
2931 rs6000_regno_regclass[r] = BASE_REGS;
2933 for (r = 32; r < 64; ++r)
2934 rs6000_regno_regclass[r] = FLOAT_REGS;
2936 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2937 rs6000_regno_regclass[r] = NO_REGS;
2939 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2940 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2942 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2943 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2944 rs6000_regno_regclass[r] = CR_REGS;
2946 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2947 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2948 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2949 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2950 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2951 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2952 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2953 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2954 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2955 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2956 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2957 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2959 /* Precalculate register class to simpler reload register class. We don't
2960 need all of the register classes that are combinations of different
2961 classes, just the simple ones that have constraint letters. */
2962 for (c = 0; c < N_REG_CLASSES; c++)
2963 reg_class_to_reg_type[c] = NO_REG_TYPE;
2965 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2966 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2967 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2968 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2969 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2970 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2971 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2972 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2973 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2974 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2975 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2976 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2978 if (TARGET_VSX)
2980 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2981 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2983 else
2985 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2986 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2989 /* Precalculate the valid memory formats as well as the vector information,
2990 this must be set up before the rs6000_hard_regno_nregs_internal calls
2991 below. */
2992 gcc_assert ((int)VECTOR_NONE == 0);
2993 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2994 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2996 gcc_assert ((int)CODE_FOR_nothing == 0);
2997 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2999 gcc_assert ((int)NO_REGS == 0);
3000 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3002 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3003 believes it can use native alignment or still uses 128-bit alignment. */
3004 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3006 align64 = 64;
3007 align32 = 32;
3009 else
3011 align64 = 128;
3012 align32 = 128;
3015 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3016 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3017 if (TARGET_FLOAT128_TYPE)
3019 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3020 rs6000_vector_align[KFmode] = 128;
3022 if (FLOAT128_IEEE_P (TFmode))
3024 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3025 rs6000_vector_align[TFmode] = 128;
3029 /* V2DF mode, VSX only. */
3030 if (TARGET_VSX)
3032 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3033 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3034 rs6000_vector_align[V2DFmode] = align64;
3037 /* V4SF mode, either VSX or Altivec. */
3038 if (TARGET_VSX)
3040 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3041 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3042 rs6000_vector_align[V4SFmode] = align32;
3044 else if (TARGET_ALTIVEC)
3046 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3047 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3048 rs6000_vector_align[V4SFmode] = align32;
3051 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3052 and stores. */
3053 if (TARGET_ALTIVEC)
3055 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3056 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3057 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3058 rs6000_vector_align[V4SImode] = align32;
3059 rs6000_vector_align[V8HImode] = align32;
3060 rs6000_vector_align[V16QImode] = align32;
3062 if (TARGET_VSX)
3064 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3065 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3066 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3068 else
3070 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3071 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3072 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3076 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3077 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3078 if (TARGET_VSX)
3080 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3081 rs6000_vector_unit[V2DImode]
3082 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3083 rs6000_vector_align[V2DImode] = align64;
3085 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3086 rs6000_vector_unit[V1TImode]
3087 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3088 rs6000_vector_align[V1TImode] = 128;
3091 /* DFmode, see if we want to use the VSX unit. Memory is handled
3092 differently, so don't set rs6000_vector_mem. */
3093 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3095 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3096 rs6000_vector_align[DFmode] = 64;
3099 /* SFmode, see if we want to use the VSX unit. */
3100 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3102 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3103 rs6000_vector_align[SFmode] = 32;
3106 /* Allow TImode in VSX register and set the VSX memory macros. */
3107 if (TARGET_VSX && TARGET_VSX_TIMODE)
3109 rs6000_vector_mem[TImode] = VECTOR_VSX;
3110 rs6000_vector_align[TImode] = align64;
3113 /* TODO add SPE and paired floating point vector support. */
3115 /* Register class constraints for the constraints that depend on compile
3116 switches. When the VSX code was added, different constraints were added
3117 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3118 of the VSX registers are used. The register classes for scalar floating
3119 point types is set, based on whether we allow that type into the upper
3120 (Altivec) registers. GCC has register classes to target the Altivec
3121 registers for load/store operations, to select using a VSX memory
3122 operation instead of the traditional floating point operation. The
3123 constraints are:
3125 d - Register class to use with traditional DFmode instructions.
3126 f - Register class to use with traditional SFmode instructions.
3127 v - Altivec register.
3128 wa - Any VSX register.
3129 wc - Reserved to represent individual CR bits (used in LLVM).
3130 wd - Preferred register class for V2DFmode.
3131 wf - Preferred register class for V4SFmode.
3132 wg - Float register for power6x move insns.
3133 wh - FP register for direct move instructions.
3134 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3135 wj - FP or VSX register to hold 64-bit integers for direct moves.
3136 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3137 wl - Float register if we can do 32-bit signed int loads.
3138 wm - VSX register for ISA 2.07 direct move operations.
3139 wn - always NO_REGS.
3140 wr - GPR if 64-bit mode is permitted.
3141 ws - Register class to do ISA 2.06 DF operations.
3142 wt - VSX register for TImode in VSX registers.
3143 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3144 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3145 ww - Register class to do SF conversions in with VSX operations.
3146 wx - Float register if we can do 32-bit int stores.
3147 wy - Register class to do ISA 2.07 SF operations.
3148 wz - Float register if we can do 32-bit unsigned int loads.
3149 wH - Altivec register if SImode is allowed in VSX registers.
3150 wI - VSX register if SImode is allowed in VSX registers.
3151 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3152 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3154 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3155 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3157 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3158 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3160 if (TARGET_VSX)
3162 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3163 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3164 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3166 if (TARGET_VSX_TIMODE)
3167 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3169 if (TARGET_UPPER_REGS_DF) /* DFmode */
3171 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3172 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3174 else
3175 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3177 if (TARGET_UPPER_REGS_DF) /* DImode */
3178 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3179 else
3180 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3183 /* Add conditional constraints based on various options, to allow us to
3184 collapse multiple insn patterns. */
3185 if (TARGET_ALTIVEC)
3186 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3188 if (TARGET_MFPGPR) /* DFmode */
3189 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3191 if (TARGET_LFIWAX)
3192 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3194 if (TARGET_DIRECT_MOVE)
3196 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3197 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3198 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3199 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3200 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3201 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3204 if (TARGET_POWERPC64)
3205 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3207 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3209 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3210 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3211 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3213 else if (TARGET_P8_VECTOR)
3215 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3216 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3218 else if (TARGET_VSX)
3219 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3221 if (TARGET_STFIWX)
3222 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3224 if (TARGET_LFIWZX)
3225 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3227 if (TARGET_FLOAT128_TYPE)
3229 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3230 if (FLOAT128_IEEE_P (TFmode))
3231 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3234 /* Support for new D-form instructions. */
3235 if (TARGET_P9_DFORM_SCALAR)
3236 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3238 /* Support for ISA 3.0 (power9) vectors. */
3239 if (TARGET_P9_VECTOR)
3240 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3242 /* Support for new direct moves (ISA 3.0 + 64bit). */
3243 if (TARGET_DIRECT_MOVE_128)
3244 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3246 /* Support small integers in VSX registers. */
3247 if (TARGET_VSX_SMALL_INTEGER)
3249 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3250 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3251 if (TARGET_P9_VECTOR)
3253 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3254 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3258 /* Set up the reload helper and direct move functions. */
3259 if (TARGET_VSX || TARGET_ALTIVEC)
3261 if (TARGET_64BIT)
3263 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3264 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3265 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3266 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3267 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3268 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3269 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3270 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3271 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3272 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3273 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3274 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3275 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3276 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3277 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3278 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3279 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3280 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3281 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3282 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3284 if (FLOAT128_VECTOR_P (KFmode))
3286 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3287 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3290 if (FLOAT128_VECTOR_P (TFmode))
3292 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3293 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3296 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3297 available. */
3298 if (TARGET_NO_SDMODE_STACK)
3300 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3301 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3304 if (TARGET_VSX_TIMODE)
3306 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3307 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3310 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3312 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3313 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3314 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3315 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3316 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3317 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3318 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3319 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3320 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3322 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3323 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3324 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3325 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3326 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3327 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3328 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3329 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3330 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3332 if (FLOAT128_VECTOR_P (KFmode))
3334 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3335 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3338 if (FLOAT128_VECTOR_P (TFmode))
3340 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3341 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3345 else
3347 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3348 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3349 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3350 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3351 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3352 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3353 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3354 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3355 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3356 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3357 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3358 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3359 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3360 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3361 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3362 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3363 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3364 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3365 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3366 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3368 if (FLOAT128_VECTOR_P (KFmode))
3370 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3371 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3374 if (FLOAT128_IEEE_P (TFmode))
3376 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3377 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3380 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3381 available. */
3382 if (TARGET_NO_SDMODE_STACK)
3384 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3385 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3388 if (TARGET_VSX_TIMODE)
3390 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3391 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3394 if (TARGET_DIRECT_MOVE)
3396 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3397 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3398 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3402 if (TARGET_UPPER_REGS_DF)
3403 reg_addr[DFmode].scalar_in_vmx_p = true;
3405 if (TARGET_UPPER_REGS_DI)
3406 reg_addr[DImode].scalar_in_vmx_p = true;
3408 if (TARGET_UPPER_REGS_SF)
3409 reg_addr[SFmode].scalar_in_vmx_p = true;
3411 if (TARGET_VSX_SMALL_INTEGER)
3413 reg_addr[SImode].scalar_in_vmx_p = true;
3414 if (TARGET_P9_VECTOR)
3416 reg_addr[HImode].scalar_in_vmx_p = true;
3417 reg_addr[QImode].scalar_in_vmx_p = true;
3422 /* Setup the fusion operations. */
3423 if (TARGET_P8_FUSION)
3425 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3426 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3427 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3428 if (TARGET_64BIT)
3429 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3432 if (TARGET_P9_FUSION)
3434 struct fuse_insns {
3435 enum machine_mode mode; /* mode of the fused type. */
3436 enum machine_mode pmode; /* pointer mode. */
3437 enum rs6000_reload_reg_type rtype; /* register type. */
3438 enum insn_code load; /* load insn. */
3439 enum insn_code store; /* store insn. */
3442 static const struct fuse_insns addis_insns[] = {
3443 { SFmode, DImode, RELOAD_REG_FPR,
3444 CODE_FOR_fusion_fpr_di_sf_load,
3445 CODE_FOR_fusion_fpr_di_sf_store },
3447 { SFmode, SImode, RELOAD_REG_FPR,
3448 CODE_FOR_fusion_fpr_si_sf_load,
3449 CODE_FOR_fusion_fpr_si_sf_store },
3451 { DFmode, DImode, RELOAD_REG_FPR,
3452 CODE_FOR_fusion_fpr_di_df_load,
3453 CODE_FOR_fusion_fpr_di_df_store },
3455 { DFmode, SImode, RELOAD_REG_FPR,
3456 CODE_FOR_fusion_fpr_si_df_load,
3457 CODE_FOR_fusion_fpr_si_df_store },
3459 { DImode, DImode, RELOAD_REG_FPR,
3460 CODE_FOR_fusion_fpr_di_di_load,
3461 CODE_FOR_fusion_fpr_di_di_store },
3463 { DImode, SImode, RELOAD_REG_FPR,
3464 CODE_FOR_fusion_fpr_si_di_load,
3465 CODE_FOR_fusion_fpr_si_di_store },
3467 { QImode, DImode, RELOAD_REG_GPR,
3468 CODE_FOR_fusion_gpr_di_qi_load,
3469 CODE_FOR_fusion_gpr_di_qi_store },
3471 { QImode, SImode, RELOAD_REG_GPR,
3472 CODE_FOR_fusion_gpr_si_qi_load,
3473 CODE_FOR_fusion_gpr_si_qi_store },
3475 { HImode, DImode, RELOAD_REG_GPR,
3476 CODE_FOR_fusion_gpr_di_hi_load,
3477 CODE_FOR_fusion_gpr_di_hi_store },
3479 { HImode, SImode, RELOAD_REG_GPR,
3480 CODE_FOR_fusion_gpr_si_hi_load,
3481 CODE_FOR_fusion_gpr_si_hi_store },
3483 { SImode, DImode, RELOAD_REG_GPR,
3484 CODE_FOR_fusion_gpr_di_si_load,
3485 CODE_FOR_fusion_gpr_di_si_store },
3487 { SImode, SImode, RELOAD_REG_GPR,
3488 CODE_FOR_fusion_gpr_si_si_load,
3489 CODE_FOR_fusion_gpr_si_si_store },
3491 { SFmode, DImode, RELOAD_REG_GPR,
3492 CODE_FOR_fusion_gpr_di_sf_load,
3493 CODE_FOR_fusion_gpr_di_sf_store },
3495 { SFmode, SImode, RELOAD_REG_GPR,
3496 CODE_FOR_fusion_gpr_si_sf_load,
3497 CODE_FOR_fusion_gpr_si_sf_store },
3499 { DImode, DImode, RELOAD_REG_GPR,
3500 CODE_FOR_fusion_gpr_di_di_load,
3501 CODE_FOR_fusion_gpr_di_di_store },
3503 { DFmode, DImode, RELOAD_REG_GPR,
3504 CODE_FOR_fusion_gpr_di_df_load,
3505 CODE_FOR_fusion_gpr_di_df_store },
3508 enum machine_mode cur_pmode = Pmode;
3509 size_t i;
3511 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3513 enum machine_mode xmode = addis_insns[i].mode;
3514 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3516 if (addis_insns[i].pmode != cur_pmode)
3517 continue;
3519 if (rtype == RELOAD_REG_FPR
3520 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3521 continue;
3523 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3524 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3528 /* Note which types we support fusing TOC setup plus memory insn. We only do
3529 fused TOCs for medium/large code models. */
3530 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3531 && (TARGET_CMODEL != CMODEL_SMALL))
3533 reg_addr[QImode].fused_toc = true;
3534 reg_addr[HImode].fused_toc = true;
3535 reg_addr[SImode].fused_toc = true;
3536 reg_addr[DImode].fused_toc = true;
3537 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3539 if (TARGET_SINGLE_FLOAT)
3540 reg_addr[SFmode].fused_toc = true;
3541 if (TARGET_DOUBLE_FLOAT)
3542 reg_addr[DFmode].fused_toc = true;
3546 /* Precalculate HARD_REGNO_NREGS. */
3547 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3548 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3549 rs6000_hard_regno_nregs[m][r]
3550 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3552 /* Precalculate HARD_REGNO_MODE_OK. */
3553 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3554 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3555 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3556 rs6000_hard_regno_mode_ok_p[m][r] = true;
3558 /* Precalculate CLASS_MAX_NREGS sizes. */
3559 for (c = 0; c < LIM_REG_CLASSES; ++c)
3561 int reg_size;
3563 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3564 reg_size = UNITS_PER_VSX_WORD;
3566 else if (c == ALTIVEC_REGS)
3567 reg_size = UNITS_PER_ALTIVEC_WORD;
3569 else if (c == FLOAT_REGS)
3570 reg_size = UNITS_PER_FP_WORD;
3572 else
3573 reg_size = UNITS_PER_WORD;
3575 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3577 machine_mode m2 = (machine_mode)m;
3578 int reg_size2 = reg_size;
3580 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3581 in VSX. */
3582 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3583 reg_size2 = UNITS_PER_FP_WORD;
3585 rs6000_class_max_nregs[m][c]
3586 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3590 if (TARGET_E500_DOUBLE)
3591 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3593 /* Calculate which modes to automatically generate code to use a the
3594 reciprocal divide and square root instructions. In the future, possibly
3595 automatically generate the instructions even if the user did not specify
3596 -mrecip. The older machines double precision reciprocal sqrt estimate is
3597 not accurate enough. */
3598 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3599 if (TARGET_FRES)
3600 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3601 if (TARGET_FRE)
3602 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3603 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3604 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3605 if (VECTOR_UNIT_VSX_P (V2DFmode))
3606 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3608 if (TARGET_FRSQRTES)
3609 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3610 if (TARGET_FRSQRTE)
3611 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3612 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3613 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3614 if (VECTOR_UNIT_VSX_P (V2DFmode))
3615 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3617 if (rs6000_recip_control)
3619 if (!flag_finite_math_only)
3620 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3621 if (flag_trapping_math)
3622 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3623 if (!flag_reciprocal_math)
3624 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3625 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3627 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3628 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3629 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3631 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3632 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3633 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3635 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3636 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3637 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3639 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3640 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3641 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3643 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3644 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3645 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3647 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3648 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3649 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3651 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3652 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3653 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3655 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3656 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3657 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3661 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3662 legitimate address support to figure out the appropriate addressing to
3663 use. */
3664 rs6000_setup_reg_addr_masks ();
3666 if (global_init_p || TARGET_DEBUG_TARGET)
3668 if (TARGET_DEBUG_REG)
3669 rs6000_debug_reg_global ();
3671 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3672 fprintf (stderr,
3673 "SImode variable mult cost = %d\n"
3674 "SImode constant mult cost = %d\n"
3675 "SImode short constant mult cost = %d\n"
3676 "DImode multipliciation cost = %d\n"
3677 "SImode division cost = %d\n"
3678 "DImode division cost = %d\n"
3679 "Simple fp operation cost = %d\n"
3680 "DFmode multiplication cost = %d\n"
3681 "SFmode division cost = %d\n"
3682 "DFmode division cost = %d\n"
3683 "cache line size = %d\n"
3684 "l1 cache size = %d\n"
3685 "l2 cache size = %d\n"
3686 "simultaneous prefetches = %d\n"
3687 "\n",
3688 rs6000_cost->mulsi,
3689 rs6000_cost->mulsi_const,
3690 rs6000_cost->mulsi_const9,
3691 rs6000_cost->muldi,
3692 rs6000_cost->divsi,
3693 rs6000_cost->divdi,
3694 rs6000_cost->fp,
3695 rs6000_cost->dmul,
3696 rs6000_cost->sdiv,
3697 rs6000_cost->ddiv,
3698 rs6000_cost->cache_line_size,
3699 rs6000_cost->l1_cache_size,
3700 rs6000_cost->l2_cache_size,
3701 rs6000_cost->simultaneous_prefetches);
3705 #if TARGET_MACHO
3706 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3708 static void
3709 darwin_rs6000_override_options (void)
3711 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3712 off. */
3713 rs6000_altivec_abi = 1;
3714 TARGET_ALTIVEC_VRSAVE = 1;
3715 rs6000_current_abi = ABI_DARWIN;
3717 if (DEFAULT_ABI == ABI_DARWIN
3718 && TARGET_64BIT)
3719 darwin_one_byte_bool = 1;
3721 if (TARGET_64BIT && ! TARGET_POWERPC64)
3723 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3724 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3726 if (flag_mkernel)
3728 rs6000_default_long_calls = 1;
3729 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3732 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3733 Altivec. */
3734 if (!flag_mkernel && !flag_apple_kext
3735 && TARGET_64BIT
3736 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3737 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3739 /* Unless the user (not the configurer) has explicitly overridden
3740 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3741 G4 unless targeting the kernel. */
3742 if (!flag_mkernel
3743 && !flag_apple_kext
3744 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3745 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3746 && ! global_options_set.x_rs6000_cpu_index)
3748 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3751 #endif
3753 /* If not otherwise specified by a target, make 'long double' equivalent to
3754 'double'. */
3756 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3757 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3758 #endif
3760 /* Return the builtin mask of the various options used that could affect which
3761 builtins were used. In the past we used target_flags, but we've run out of
3762 bits, and some options like SPE and PAIRED are no longer in
3763 target_flags. */
3765 HOST_WIDE_INT
3766 rs6000_builtin_mask_calculate (void)
3768 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3769 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3770 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3771 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3772 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3773 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3774 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3775 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3776 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3777 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3778 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3779 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3780 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3781 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3782 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3783 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3784 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3785 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3786 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3787 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3788 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3791 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3792 to clobber the XER[CA] bit because clobbering that bit without telling
3793 the compiler worked just fine with versions of GCC before GCC 5, and
3794 breaking a lot of older code in ways that are hard to track down is
3795 not such a great idea. */
3797 static rtx_insn *
3798 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3799 vec<const char *> &/*constraints*/,
3800 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3802 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3803 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3804 return NULL;
3807 /* Override command line options. Mostly we process the processor type and
3808 sometimes adjust other TARGET_ options. */
3810 static bool
3811 rs6000_option_override_internal (bool global_init_p)
3813 bool ret = true;
3814 bool have_cpu = false;
3816 /* The default cpu requested at configure time, if any. */
3817 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3819 HOST_WIDE_INT set_masks;
3820 int cpu_index;
3821 int tune_index;
3822 struct cl_target_option *main_target_opt
3823 = ((global_init_p || target_option_default_node == NULL)
3824 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3826 /* Print defaults. */
3827 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3828 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3830 /* Remember the explicit arguments. */
3831 if (global_init_p)
3832 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3834 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3835 library functions, so warn about it. The flag may be useful for
3836 performance studies from time to time though, so don't disable it
3837 entirely. */
3838 if (global_options_set.x_rs6000_alignment_flags
3839 && rs6000_alignment_flags == MASK_ALIGN_POWER
3840 && DEFAULT_ABI == ABI_DARWIN
3841 && TARGET_64BIT)
3842 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3843 " it is incompatible with the installed C and C++ libraries");
3845 /* Numerous experiment shows that IRA based loop pressure
3846 calculation works better for RTL loop invariant motion on targets
3847 with enough (>= 32) registers. It is an expensive optimization.
3848 So it is on only for peak performance. */
3849 if (optimize >= 3 && global_init_p
3850 && !global_options_set.x_flag_ira_loop_pressure)
3851 flag_ira_loop_pressure = 1;
3853 /* Set the pointer size. */
3854 if (TARGET_64BIT)
3856 rs6000_pmode = (int)DImode;
3857 rs6000_pointer_size = 64;
3859 else
3861 rs6000_pmode = (int)SImode;
3862 rs6000_pointer_size = 32;
3865 /* Some OSs don't support saving the high part of 64-bit registers on context
3866 switch. Other OSs don't support saving Altivec registers. On those OSs,
3867 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3868 if the user wants either, the user must explicitly specify them and we
3869 won't interfere with the user's specification. */
3871 set_masks = POWERPC_MASKS;
3872 #ifdef OS_MISSING_POWERPC64
3873 if (OS_MISSING_POWERPC64)
3874 set_masks &= ~OPTION_MASK_POWERPC64;
3875 #endif
3876 #ifdef OS_MISSING_ALTIVEC
3877 if (OS_MISSING_ALTIVEC)
3878 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3879 #endif
3881 /* Don't override by the processor default if given explicitly. */
3882 set_masks &= ~rs6000_isa_flags_explicit;
3884 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3885 the cpu in a target attribute or pragma, but did not specify a tuning
3886 option, use the cpu for the tuning option rather than the option specified
3887 with -mtune on the command line. Process a '--with-cpu' configuration
3888 request as an implicit --cpu. */
3889 if (rs6000_cpu_index >= 0)
3891 cpu_index = rs6000_cpu_index;
3892 have_cpu = true;
3894 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3896 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3897 have_cpu = true;
3899 else if (implicit_cpu)
3901 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3902 have_cpu = true;
3904 else
3906 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3907 const char *default_cpu = ((!TARGET_POWERPC64)
3908 ? "powerpc"
3909 : ((BYTES_BIG_ENDIAN)
3910 ? "powerpc64"
3911 : "powerpc64le"));
3913 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3914 have_cpu = false;
3917 gcc_assert (cpu_index >= 0);
3919 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3920 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3921 with those from the cpu, except for options that were explicitly set. If
3922 we don't have a cpu, do not override the target bits set in
3923 TARGET_DEFAULT. */
3924 if (have_cpu)
3926 rs6000_isa_flags &= ~set_masks;
3927 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3928 & set_masks);
3930 else
3932 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3933 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3934 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3935 to using rs6000_isa_flags, we need to do the initialization here.
3937 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3938 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3939 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3940 : processor_target_table[cpu_index].target_enable);
3941 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3944 if (rs6000_tune_index >= 0)
3945 tune_index = rs6000_tune_index;
3946 else if (have_cpu)
3947 rs6000_tune_index = tune_index = cpu_index;
3948 else
3950 size_t i;
3951 enum processor_type tune_proc
3952 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3954 tune_index = -1;
3955 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3956 if (processor_target_table[i].processor == tune_proc)
3958 rs6000_tune_index = tune_index = i;
3959 break;
3963 gcc_assert (tune_index >= 0);
3964 rs6000_cpu = processor_target_table[tune_index].processor;
3966 /* Pick defaults for SPE related control flags. Do this early to make sure
3967 that the TARGET_ macros are representative ASAP. */
3969 int spe_capable_cpu =
3970 (rs6000_cpu == PROCESSOR_PPC8540
3971 || rs6000_cpu == PROCESSOR_PPC8548);
3973 if (!global_options_set.x_rs6000_spe_abi)
3974 rs6000_spe_abi = spe_capable_cpu;
3976 if (!global_options_set.x_rs6000_spe)
3977 rs6000_spe = spe_capable_cpu;
3979 if (!global_options_set.x_rs6000_float_gprs)
3980 rs6000_float_gprs =
3981 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3982 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3983 : 0);
3986 if (global_options_set.x_rs6000_spe_abi
3987 && rs6000_spe_abi
3988 && !TARGET_SPE_ABI)
3989 error ("not configured for SPE ABI");
3991 if (global_options_set.x_rs6000_spe
3992 && rs6000_spe
3993 && !TARGET_SPE)
3994 error ("not configured for SPE instruction set");
3996 if (main_target_opt != NULL
3997 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3998 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3999 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4000 error ("target attribute or pragma changes SPE ABI");
4002 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4003 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4004 || rs6000_cpu == PROCESSOR_PPCE5500)
4006 if (TARGET_ALTIVEC)
4007 error ("AltiVec not supported in this target");
4008 if (TARGET_SPE)
4009 error ("SPE not supported in this target");
4011 if (rs6000_cpu == PROCESSOR_PPCE6500)
4013 if (TARGET_SPE)
4014 error ("SPE not supported in this target");
4017 /* Disable Cell microcode if we are optimizing for the Cell
4018 and not optimizing for size. */
4019 if (rs6000_gen_cell_microcode == -1)
4020 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4021 && !optimize_size);
4023 /* If we are optimizing big endian systems for space and it's OK to
4024 use instructions that would be microcoded on the Cell, use the
4025 load/store multiple and string instructions. */
4026 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4027 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4028 | OPTION_MASK_STRING);
4030 /* Don't allow -mmultiple or -mstring on little endian systems
4031 unless the cpu is a 750, because the hardware doesn't support the
4032 instructions used in little endian mode, and causes an alignment
4033 trap. The 750 does not cause an alignment trap (except when the
4034 target is unaligned). */
4036 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4038 if (TARGET_MULTIPLE)
4040 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4041 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4042 warning (0, "-mmultiple is not supported on little endian systems");
4045 if (TARGET_STRING)
4047 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4048 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4049 warning (0, "-mstring is not supported on little endian systems");
4053 /* If little-endian, default to -mstrict-align on older processors.
4054 Testing for htm matches power8 and later. */
4055 if (!BYTES_BIG_ENDIAN
4056 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4057 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4059 /* -maltivec={le,be} implies -maltivec. */
4060 if (rs6000_altivec_element_order != 0)
4061 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4063 /* Disallow -maltivec=le in big endian mode for now. This is not
4064 known to be useful for anyone. */
4065 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4067 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4068 rs6000_altivec_element_order = 0;
4071 /* Add some warnings for VSX. */
4072 if (TARGET_VSX)
4074 const char *msg = NULL;
4075 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4076 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4078 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4079 msg = N_("-mvsx requires hardware floating point");
4080 else
4082 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4083 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4086 else if (TARGET_PAIRED_FLOAT)
4087 msg = N_("-mvsx and -mpaired are incompatible");
4088 else if (TARGET_AVOID_XFORM > 0)
4089 msg = N_("-mvsx needs indexed addressing");
4090 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4091 & OPTION_MASK_ALTIVEC))
4093 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4094 msg = N_("-mvsx and -mno-altivec are incompatible");
4095 else
4096 msg = N_("-mno-altivec disables vsx");
4099 if (msg)
4101 warning (0, msg);
4102 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4103 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4107 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4108 the -mcpu setting to enable options that conflict. */
4109 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4110 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4111 | OPTION_MASK_ALTIVEC
4112 | OPTION_MASK_VSX)) != 0)
4113 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4114 | OPTION_MASK_DIRECT_MOVE)
4115 & ~rs6000_isa_flags_explicit);
4117 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4118 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4120 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4121 unless the user explicitly used the -mno-<option> to disable the code. */
4122 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4123 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4124 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4125 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4126 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4127 else if (TARGET_VSX)
4128 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4129 else if (TARGET_POPCNTD)
4130 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4131 else if (TARGET_DFP)
4132 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4133 else if (TARGET_CMPB)
4134 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4135 else if (TARGET_FPRND)
4136 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4137 else if (TARGET_POPCNTB)
4138 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4139 else if (TARGET_ALTIVEC)
4140 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4142 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4144 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4145 error ("-mcrypto requires -maltivec");
4146 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4149 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4151 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4152 error ("-mdirect-move requires -mvsx");
4153 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4156 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4158 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4159 error ("-mpower8-vector requires -maltivec");
4160 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4163 if (TARGET_P8_VECTOR && !TARGET_VSX)
4165 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4166 error ("-mpower8-vector requires -mvsx");
4167 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4170 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4172 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4173 error ("-mvsx-timode requires -mvsx");
4174 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4177 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4179 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4180 error ("-mhard-dfp requires -mhard-float");
4181 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4184 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4185 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4186 set the individual option. */
4187 if (TARGET_UPPER_REGS > 0)
4189 if (TARGET_VSX
4190 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4192 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4193 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4195 if (TARGET_VSX
4196 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4198 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4199 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4201 if (TARGET_P8_VECTOR
4202 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4204 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4205 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4208 else if (TARGET_UPPER_REGS == 0)
4210 if (TARGET_VSX
4211 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4213 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4214 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4216 if (TARGET_VSX
4217 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4219 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4220 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4222 if (TARGET_P8_VECTOR
4223 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4225 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4226 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4230 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4232 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4233 error ("-mupper-regs-df requires -mvsx");
4234 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4237 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4239 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4240 error ("-mupper-regs-di requires -mvsx");
4241 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4244 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4246 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4247 error ("-mupper-regs-sf requires -mpower8-vector");
4248 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4251 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4252 silently turn off quad memory mode. */
4253 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4255 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4256 warning (0, N_("-mquad-memory requires 64-bit mode"));
4258 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4259 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4261 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4262 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4265 /* Non-atomic quad memory load/store are disabled for little endian, since
4266 the words are reversed, but atomic operations can still be done by
4267 swapping the words. */
4268 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4270 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4271 warning (0, N_("-mquad-memory is not available in little endian mode"));
4273 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4276 /* Assume if the user asked for normal quad memory instructions, they want
4277 the atomic versions as well, unless they explicity told us not to use quad
4278 word atomic instructions. */
4279 if (TARGET_QUAD_MEMORY
4280 && !TARGET_QUAD_MEMORY_ATOMIC
4281 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4282 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4284 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4285 generating power8 instructions. */
4286 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4287 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4288 & OPTION_MASK_P8_FUSION);
4290 /* Setting additional fusion flags turns on base fusion. */
4291 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4293 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4295 if (TARGET_P8_FUSION_SIGN)
4296 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4298 if (TARGET_TOC_FUSION)
4299 error ("-mtoc-fusion requires -mpower8-fusion");
4301 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4303 else
4304 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4307 /* Power9 fusion is a superset over power8 fusion. */
4308 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4310 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4312 /* We prefer to not mention undocumented options in
4313 error messages. However, if users have managed to select
4314 power9-fusion without selecting power8-fusion, they
4315 already know about undocumented flags. */
4316 error ("-mpower9-fusion requires -mpower8-fusion");
4317 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4319 else
4320 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4323 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4324 generating power9 instructions. */
4325 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4326 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4327 & OPTION_MASK_P9_FUSION);
4329 /* Power8 does not fuse sign extended loads with the addis. If we are
4330 optimizing at high levels for speed, convert a sign extended load into a
4331 zero extending load, and an explicit sign extension. */
4332 if (TARGET_P8_FUSION
4333 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4334 && optimize_function_for_speed_p (cfun)
4335 && optimize >= 3)
4336 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4338 /* TOC fusion requires 64-bit and medium/large code model. */
4339 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4341 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4342 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4343 warning (0, N_("-mtoc-fusion requires 64-bit"));
4346 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4348 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4349 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4350 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4353 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4354 model. */
4355 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4356 && (TARGET_CMODEL != CMODEL_SMALL)
4357 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4358 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4360 /* ISA 3.0 vector instructions include ISA 2.07. */
4361 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4363 /* We prefer to not mention undocumented options in
4364 error messages. However, if users have managed to select
4365 power9-vector without selecting power8-vector, they
4366 already know about undocumented flags. */
4367 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4368 error ("-mpower9-vector requires -mpower8-vector");
4369 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4372 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4373 -mpower9-dform-vector. */
4374 if (TARGET_P9_DFORM_BOTH > 0)
4376 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4377 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4379 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4380 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4382 else if (TARGET_P9_DFORM_BOTH == 0)
4384 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4385 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4387 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4388 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4391 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4392 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4394 /* We prefer to not mention undocumented options in
4395 error messages. However, if users have managed to select
4396 power9-dform without selecting power9-vector, they
4397 already know about undocumented flags. */
4398 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4399 error ("-mpower9-dform requires -mpower9-vector");
4400 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4401 | OPTION_MASK_P9_DFORM_VECTOR);
4404 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4406 /* We prefer to not mention undocumented options in
4407 error messages. However, if users have managed to select
4408 power9-dform without selecting upper-regs-df, they
4409 already know about undocumented flags. */
4410 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4411 error ("-mpower9-dform requires -mupper-regs-df");
4412 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4415 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4417 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4418 error ("-mpower9-dform requires -mupper-regs-sf");
4419 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4422 /* Enable LRA by default. */
4423 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4424 rs6000_isa_flags |= OPTION_MASK_LRA;
4426 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4427 but do show up with -mno-lra. Given -mlra will become the default once
4428 PR 69847 is fixed, turn off the options with problems by default if
4429 -mno-lra was used, and warn if the user explicitly asked for the option.
4431 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4432 Enable -mvsx-timode by default if LRA and VSX. */
4433 if (!TARGET_LRA)
4435 if (TARGET_VSX_TIMODE)
4437 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4438 warning (0, "-mvsx-timode might need -mlra");
4440 else
4441 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4445 else
4447 if (TARGET_VSX && !TARGET_VSX_TIMODE
4448 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4449 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4452 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4453 support. If we only have ISA 2.06 support, and the user did not specify
4454 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4455 but we don't enable the full vectorization support */
4456 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4457 TARGET_ALLOW_MOVMISALIGN = 1;
4459 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4461 if (TARGET_ALLOW_MOVMISALIGN > 0
4462 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4463 error ("-mallow-movmisalign requires -mvsx");
4465 TARGET_ALLOW_MOVMISALIGN = 0;
4468 /* Determine when unaligned vector accesses are permitted, and when
4469 they are preferred over masked Altivec loads. Note that if
4470 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4471 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4472 not true. */
4473 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4475 if (!TARGET_VSX)
4477 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4478 error ("-mefficient-unaligned-vsx requires -mvsx");
4480 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4483 else if (!TARGET_ALLOW_MOVMISALIGN)
4485 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4486 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4488 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4492 /* Check whether we should allow small integers into VSX registers. We
4493 require direct move to prevent the register allocator from having to move
4494 variables through memory to do moves. SImode can be used on ISA 2.07,
4495 while HImode and QImode require ISA 3.0. */
4496 if (TARGET_VSX_SMALL_INTEGER
4497 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4499 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4500 error ("-mvsx-small-integer requires -mpower8-vector, "
4501 "-mupper-regs-di, and -mdirect-move");
4503 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4506 /* Set long double size before the IEEE 128-bit tests. */
4507 if (!global_options_set.x_rs6000_long_double_type_size)
4509 if (main_target_opt != NULL
4510 && (main_target_opt->x_rs6000_long_double_type_size
4511 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4512 error ("target attribute or pragma changes long double size");
4513 else
4514 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4517 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4518 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4519 pick up this default. */
4520 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4521 if (!global_options_set.x_rs6000_ieeequad)
4522 rs6000_ieeequad = 1;
4523 #endif
4525 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4526 sytems, but don't enable the __float128 keyword. */
4527 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4528 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4529 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4530 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4532 /* IEEE 128-bit floating point requires VSX support. */
4533 if (!TARGET_VSX)
4535 if (TARGET_FLOAT128_KEYWORD)
4537 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4538 error ("-mfloat128 requires VSX support");
4540 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4541 | OPTION_MASK_FLOAT128_KEYWORD
4542 | OPTION_MASK_FLOAT128_HW);
4545 else if (TARGET_FLOAT128_TYPE)
4547 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4548 error ("-mfloat128-type requires VSX support");
4550 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4551 | OPTION_MASK_FLOAT128_KEYWORD
4552 | OPTION_MASK_FLOAT128_HW);
4556 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4557 128-bit floating point support to be enabled. */
4558 if (!TARGET_FLOAT128_TYPE)
4560 if (TARGET_FLOAT128_KEYWORD)
4562 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4564 error ("-mfloat128 requires -mfloat128-type");
4565 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4566 | OPTION_MASK_FLOAT128_KEYWORD
4567 | OPTION_MASK_FLOAT128_HW);
4569 else
4570 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4573 if (TARGET_FLOAT128_HW)
4575 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4577 error ("-mfloat128-hardware requires -mfloat128-type");
4578 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4580 else
4581 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4582 | OPTION_MASK_FLOAT128_KEYWORD
4583 | OPTION_MASK_FLOAT128_HW);
4587 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4588 -mfloat128-hardware by default. However, don't enable the __float128
4589 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4590 -mfloat128 option as well if it was not already set. */
4591 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4592 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4593 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4594 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4596 if (TARGET_FLOAT128_HW
4597 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4599 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4600 error ("-mfloat128-hardware requires full ISA 3.0 support");
4602 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4605 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4606 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4607 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4608 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4610 /* Print the options after updating the defaults. */
4611 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4612 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4614 /* E500mc does "better" if we inline more aggressively. Respect the
4615 user's opinion, though. */
4616 if (rs6000_block_move_inline_limit == 0
4617 && (rs6000_cpu == PROCESSOR_PPCE500MC
4618 || rs6000_cpu == PROCESSOR_PPCE500MC64
4619 || rs6000_cpu == PROCESSOR_PPCE5500
4620 || rs6000_cpu == PROCESSOR_PPCE6500))
4621 rs6000_block_move_inline_limit = 128;
4623 /* store_one_arg depends on expand_block_move to handle at least the
4624 size of reg_parm_stack_space. */
4625 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4626 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4628 if (global_init_p)
4630 /* If the appropriate debug option is enabled, replace the target hooks
4631 with debug versions that call the real version and then prints
4632 debugging information. */
4633 if (TARGET_DEBUG_COST)
4635 targetm.rtx_costs = rs6000_debug_rtx_costs;
4636 targetm.address_cost = rs6000_debug_address_cost;
4637 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4640 if (TARGET_DEBUG_ADDR)
4642 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4643 targetm.legitimize_address = rs6000_debug_legitimize_address;
4644 rs6000_secondary_reload_class_ptr
4645 = rs6000_debug_secondary_reload_class;
4646 rs6000_secondary_memory_needed_ptr
4647 = rs6000_debug_secondary_memory_needed;
4648 rs6000_cannot_change_mode_class_ptr
4649 = rs6000_debug_cannot_change_mode_class;
4650 rs6000_preferred_reload_class_ptr
4651 = rs6000_debug_preferred_reload_class;
4652 rs6000_legitimize_reload_address_ptr
4653 = rs6000_debug_legitimize_reload_address;
4654 rs6000_mode_dependent_address_ptr
4655 = rs6000_debug_mode_dependent_address;
4658 if (rs6000_veclibabi_name)
4660 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4661 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4662 else
4664 error ("unknown vectorization library ABI type (%s) for "
4665 "-mveclibabi= switch", rs6000_veclibabi_name);
4666 ret = false;
4671 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4672 target attribute or pragma which automatically enables both options,
4673 unless the altivec ABI was set. This is set by default for 64-bit, but
4674 not for 32-bit. */
4675 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4676 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4677 | OPTION_MASK_FLOAT128_TYPE
4678 | OPTION_MASK_FLOAT128_KEYWORD)
4679 & ~rs6000_isa_flags_explicit);
4681 /* Enable Altivec ABI for AIX -maltivec. */
4682 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4684 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4685 error ("target attribute or pragma changes AltiVec ABI");
4686 else
4687 rs6000_altivec_abi = 1;
4690 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4691 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4692 be explicitly overridden in either case. */
4693 if (TARGET_ELF)
4695 if (!global_options_set.x_rs6000_altivec_abi
4696 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4698 if (main_target_opt != NULL &&
4699 !main_target_opt->x_rs6000_altivec_abi)
4700 error ("target attribute or pragma changes AltiVec ABI");
4701 else
4702 rs6000_altivec_abi = 1;
4706 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4707 So far, the only darwin64 targets are also MACH-O. */
4708 if (TARGET_MACHO
4709 && DEFAULT_ABI == ABI_DARWIN
4710 && TARGET_64BIT)
4712 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4713 error ("target attribute or pragma changes darwin64 ABI");
4714 else
4716 rs6000_darwin64_abi = 1;
4717 /* Default to natural alignment, for better performance. */
4718 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4722 /* Place FP constants in the constant pool instead of TOC
4723 if section anchors enabled. */
4724 if (flag_section_anchors
4725 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4726 TARGET_NO_FP_IN_TOC = 1;
4728 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4729 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4731 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4732 SUBTARGET_OVERRIDE_OPTIONS;
4733 #endif
4734 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4735 SUBSUBTARGET_OVERRIDE_OPTIONS;
4736 #endif
4737 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4738 SUB3TARGET_OVERRIDE_OPTIONS;
4739 #endif
4741 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4742 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4744 /* For the E500 family of cores, reset the single/double FP flags to let us
4745 check that they remain constant across attributes or pragmas. Also,
4746 clear a possible request for string instructions, not supported and which
4747 we might have silently queried above for -Os.
4749 For other families, clear ISEL in case it was set implicitly.
4752 switch (rs6000_cpu)
4754 case PROCESSOR_PPC8540:
4755 case PROCESSOR_PPC8548:
4756 case PROCESSOR_PPCE500MC:
4757 case PROCESSOR_PPCE500MC64:
4758 case PROCESSOR_PPCE5500:
4759 case PROCESSOR_PPCE6500:
4761 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4762 rs6000_double_float = TARGET_E500_DOUBLE;
4764 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4766 break;
4768 default:
4770 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4771 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4773 break;
4776 if (main_target_opt)
4778 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4779 error ("target attribute or pragma changes single precision floating "
4780 "point");
4781 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4782 error ("target attribute or pragma changes double precision floating "
4783 "point");
4786 /* Detect invalid option combinations with E500. */
4787 CHECK_E500_OPTIONS;
4789 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4790 && rs6000_cpu != PROCESSOR_POWER5
4791 && rs6000_cpu != PROCESSOR_POWER6
4792 && rs6000_cpu != PROCESSOR_POWER7
4793 && rs6000_cpu != PROCESSOR_POWER8
4794 && rs6000_cpu != PROCESSOR_POWER9
4795 && rs6000_cpu != PROCESSOR_PPCA2
4796 && rs6000_cpu != PROCESSOR_CELL
4797 && rs6000_cpu != PROCESSOR_PPC476);
4798 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4799 || rs6000_cpu == PROCESSOR_POWER5
4800 || rs6000_cpu == PROCESSOR_POWER7
4801 || rs6000_cpu == PROCESSOR_POWER8);
4802 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4803 || rs6000_cpu == PROCESSOR_POWER5
4804 || rs6000_cpu == PROCESSOR_POWER6
4805 || rs6000_cpu == PROCESSOR_POWER7
4806 || rs6000_cpu == PROCESSOR_POWER8
4807 || rs6000_cpu == PROCESSOR_POWER9
4808 || rs6000_cpu == PROCESSOR_PPCE500MC
4809 || rs6000_cpu == PROCESSOR_PPCE500MC64
4810 || rs6000_cpu == PROCESSOR_PPCE5500
4811 || rs6000_cpu == PROCESSOR_PPCE6500);
4813 /* Allow debug switches to override the above settings. These are set to -1
4814 in rs6000.opt to indicate the user hasn't directly set the switch. */
4815 if (TARGET_ALWAYS_HINT >= 0)
4816 rs6000_always_hint = TARGET_ALWAYS_HINT;
4818 if (TARGET_SCHED_GROUPS >= 0)
4819 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4821 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4822 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4824 rs6000_sched_restricted_insns_priority
4825 = (rs6000_sched_groups ? 1 : 0);
4827 /* Handle -msched-costly-dep option. */
4828 rs6000_sched_costly_dep
4829 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4831 if (rs6000_sched_costly_dep_str)
4833 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4834 rs6000_sched_costly_dep = no_dep_costly;
4835 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4836 rs6000_sched_costly_dep = all_deps_costly;
4837 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4838 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4839 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4840 rs6000_sched_costly_dep = store_to_load_dep_costly;
4841 else
4842 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4843 atoi (rs6000_sched_costly_dep_str));
4846 /* Handle -minsert-sched-nops option. */
4847 rs6000_sched_insert_nops
4848 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4850 if (rs6000_sched_insert_nops_str)
4852 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4853 rs6000_sched_insert_nops = sched_finish_none;
4854 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4855 rs6000_sched_insert_nops = sched_finish_pad_groups;
4856 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4857 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4858 else
4859 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4860 atoi (rs6000_sched_insert_nops_str));
4863 if (global_init_p)
4865 #ifdef TARGET_REGNAMES
4866 /* If the user desires alternate register names, copy in the
4867 alternate names now. */
4868 if (TARGET_REGNAMES)
4869 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4870 #endif
4872 /* Set aix_struct_return last, after the ABI is determined.
4873 If -maix-struct-return or -msvr4-struct-return was explicitly
4874 used, don't override with the ABI default. */
4875 if (!global_options_set.x_aix_struct_return)
4876 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4878 #if 0
4879 /* IBM XL compiler defaults to unsigned bitfields. */
4880 if (TARGET_XL_COMPAT)
4881 flag_signed_bitfields = 0;
4882 #endif
4884 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4885 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4887 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4889 /* We can only guarantee the availability of DI pseudo-ops when
4890 assembling for 64-bit targets. */
4891 if (!TARGET_64BIT)
4893 targetm.asm_out.aligned_op.di = NULL;
4894 targetm.asm_out.unaligned_op.di = NULL;
4898 /* Set branch target alignment, if not optimizing for size. */
4899 if (!optimize_size)
4901 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4902 aligned 8byte to avoid misprediction by the branch predictor. */
4903 if (rs6000_cpu == PROCESSOR_TITAN
4904 || rs6000_cpu == PROCESSOR_CELL)
4906 if (align_functions <= 0)
4907 align_functions = 8;
4908 if (align_jumps <= 0)
4909 align_jumps = 8;
4910 if (align_loops <= 0)
4911 align_loops = 8;
4913 if (rs6000_align_branch_targets)
4915 if (align_functions <= 0)
4916 align_functions = 16;
4917 if (align_jumps <= 0)
4918 align_jumps = 16;
4919 if (align_loops <= 0)
4921 can_override_loop_align = 1;
4922 align_loops = 16;
4925 if (align_jumps_max_skip <= 0)
4926 align_jumps_max_skip = 15;
4927 if (align_loops_max_skip <= 0)
4928 align_loops_max_skip = 15;
4931 /* Arrange to save and restore machine status around nested functions. */
4932 init_machine_status = rs6000_init_machine_status;
4934 /* We should always be splitting complex arguments, but we can't break
4935 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4936 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4937 targetm.calls.split_complex_arg = NULL;
4939 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4940 if (DEFAULT_ABI == ABI_AIX)
4941 targetm.calls.custom_function_descriptors = 0;
4944 /* Initialize rs6000_cost with the appropriate target costs. */
4945 if (optimize_size)
4946 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4947 else
4948 switch (rs6000_cpu)
4950 case PROCESSOR_RS64A:
4951 rs6000_cost = &rs64a_cost;
4952 break;
4954 case PROCESSOR_MPCCORE:
4955 rs6000_cost = &mpccore_cost;
4956 break;
4958 case PROCESSOR_PPC403:
4959 rs6000_cost = &ppc403_cost;
4960 break;
4962 case PROCESSOR_PPC405:
4963 rs6000_cost = &ppc405_cost;
4964 break;
4966 case PROCESSOR_PPC440:
4967 rs6000_cost = &ppc440_cost;
4968 break;
4970 case PROCESSOR_PPC476:
4971 rs6000_cost = &ppc476_cost;
4972 break;
4974 case PROCESSOR_PPC601:
4975 rs6000_cost = &ppc601_cost;
4976 break;
4978 case PROCESSOR_PPC603:
4979 rs6000_cost = &ppc603_cost;
4980 break;
4982 case PROCESSOR_PPC604:
4983 rs6000_cost = &ppc604_cost;
4984 break;
4986 case PROCESSOR_PPC604e:
4987 rs6000_cost = &ppc604e_cost;
4988 break;
4990 case PROCESSOR_PPC620:
4991 rs6000_cost = &ppc620_cost;
4992 break;
4994 case PROCESSOR_PPC630:
4995 rs6000_cost = &ppc630_cost;
4996 break;
4998 case PROCESSOR_CELL:
4999 rs6000_cost = &ppccell_cost;
5000 break;
5002 case PROCESSOR_PPC750:
5003 case PROCESSOR_PPC7400:
5004 rs6000_cost = &ppc750_cost;
5005 break;
5007 case PROCESSOR_PPC7450:
5008 rs6000_cost = &ppc7450_cost;
5009 break;
5011 case PROCESSOR_PPC8540:
5012 case PROCESSOR_PPC8548:
5013 rs6000_cost = &ppc8540_cost;
5014 break;
5016 case PROCESSOR_PPCE300C2:
5017 case PROCESSOR_PPCE300C3:
5018 rs6000_cost = &ppce300c2c3_cost;
5019 break;
5021 case PROCESSOR_PPCE500MC:
5022 rs6000_cost = &ppce500mc_cost;
5023 break;
5025 case PROCESSOR_PPCE500MC64:
5026 rs6000_cost = &ppce500mc64_cost;
5027 break;
5029 case PROCESSOR_PPCE5500:
5030 rs6000_cost = &ppce5500_cost;
5031 break;
5033 case PROCESSOR_PPCE6500:
5034 rs6000_cost = &ppce6500_cost;
5035 break;
5037 case PROCESSOR_TITAN:
5038 rs6000_cost = &titan_cost;
5039 break;
5041 case PROCESSOR_POWER4:
5042 case PROCESSOR_POWER5:
5043 rs6000_cost = &power4_cost;
5044 break;
5046 case PROCESSOR_POWER6:
5047 rs6000_cost = &power6_cost;
5048 break;
5050 case PROCESSOR_POWER7:
5051 rs6000_cost = &power7_cost;
5052 break;
5054 case PROCESSOR_POWER8:
5055 rs6000_cost = &power8_cost;
5056 break;
5058 case PROCESSOR_POWER9:
5059 rs6000_cost = &power9_cost;
5060 break;
5062 case PROCESSOR_PPCA2:
5063 rs6000_cost = &ppca2_cost;
5064 break;
5066 default:
5067 gcc_unreachable ();
5070 if (global_init_p)
5072 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5073 rs6000_cost->simultaneous_prefetches,
5074 global_options.x_param_values,
5075 global_options_set.x_param_values);
5076 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5077 global_options.x_param_values,
5078 global_options_set.x_param_values);
5079 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5080 rs6000_cost->cache_line_size,
5081 global_options.x_param_values,
5082 global_options_set.x_param_values);
5083 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5084 global_options.x_param_values,
5085 global_options_set.x_param_values);
5087 /* Increase loop peeling limits based on performance analysis. */
5088 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5089 global_options.x_param_values,
5090 global_options_set.x_param_values);
5091 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5092 global_options.x_param_values,
5093 global_options_set.x_param_values);
5095 /* If using typedef char *va_list, signal that
5096 __builtin_va_start (&ap, 0) can be optimized to
5097 ap = __builtin_next_arg (0). */
5098 if (DEFAULT_ABI != ABI_V4)
5099 targetm.expand_builtin_va_start = NULL;
5102 /* Set up single/double float flags.
5103 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5104 then set both flags. */
5105 if (TARGET_HARD_FLOAT && TARGET_FPRS
5106 && rs6000_single_float == 0 && rs6000_double_float == 0)
5107 rs6000_single_float = rs6000_double_float = 1;
5109 /* If not explicitly specified via option, decide whether to generate indexed
5110 load/store instructions. */
5111 if (TARGET_AVOID_XFORM == -1)
5112 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5113 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5114 need indexed accesses and the type used is the scalar type of the element
5115 being loaded or stored. */
5116 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5117 && !TARGET_ALTIVEC);
5119 /* Set the -mrecip options. */
5120 if (rs6000_recip_name)
5122 char *p = ASTRDUP (rs6000_recip_name);
5123 char *q;
5124 unsigned int mask, i;
5125 bool invert;
5127 while ((q = strtok (p, ",")) != NULL)
5129 p = NULL;
5130 if (*q == '!')
5132 invert = true;
5133 q++;
5135 else
5136 invert = false;
5138 if (!strcmp (q, "default"))
5139 mask = ((TARGET_RECIP_PRECISION)
5140 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5141 else
5143 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5144 if (!strcmp (q, recip_options[i].string))
5146 mask = recip_options[i].mask;
5147 break;
5150 if (i == ARRAY_SIZE (recip_options))
5152 error ("unknown option for -mrecip=%s", q);
5153 invert = false;
5154 mask = 0;
5155 ret = false;
5159 if (invert)
5160 rs6000_recip_control &= ~mask;
5161 else
5162 rs6000_recip_control |= mask;
5166 /* Set the builtin mask of the various options used that could affect which
5167 builtins were used. In the past we used target_flags, but we've run out
5168 of bits, and some options like SPE and PAIRED are no longer in
5169 target_flags. */
5170 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5171 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5172 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5173 rs6000_builtin_mask);
5175 /* Initialize all of the registers. */
5176 rs6000_init_hard_regno_mode_ok (global_init_p);
5178 /* Save the initial options in case the user does function specific options */
5179 if (global_init_p)
5180 target_option_default_node = target_option_current_node
5181 = build_target_option_node (&global_options);
5183 /* If not explicitly specified via option, decide whether to generate the
5184 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5185 if (TARGET_LINK_STACK == -1)
5186 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5188 return ret;
5191 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5192 define the target cpu type. */
5194 static void
5195 rs6000_option_override (void)
5197 (void) rs6000_option_override_internal (true);
5199 /* Register machine-specific passes. This needs to be done at start-up.
5200 It's convenient to do it here (like i386 does). */
5201 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5203 struct register_pass_info analyze_swaps_info
5204 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5206 register_pass (&analyze_swaps_info);
5210 /* Implement targetm.vectorize.builtin_mask_for_load. */
5211 static tree
5212 rs6000_builtin_mask_for_load (void)
5214 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5215 if ((TARGET_ALTIVEC && !TARGET_VSX)
5216 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5217 return altivec_builtin_mask_for_load;
5218 else
5219 return 0;
5222 /* Implement LOOP_ALIGN. */
5224 rs6000_loop_align (rtx label)
5226 basic_block bb;
5227 int ninsns;
5229 /* Don't override loop alignment if -falign-loops was specified. */
5230 if (!can_override_loop_align)
5231 return align_loops_log;
5233 bb = BLOCK_FOR_INSN (label);
5234 ninsns = num_loop_insns(bb->loop_father);
5236 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5237 if (ninsns > 4 && ninsns <= 8
5238 && (rs6000_cpu == PROCESSOR_POWER4
5239 || rs6000_cpu == PROCESSOR_POWER5
5240 || rs6000_cpu == PROCESSOR_POWER6
5241 || rs6000_cpu == PROCESSOR_POWER7
5242 || rs6000_cpu == PROCESSOR_POWER8
5243 || rs6000_cpu == PROCESSOR_POWER9))
5244 return 5;
5245 else
5246 return align_loops_log;
5249 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5250 static int
5251 rs6000_loop_align_max_skip (rtx_insn *label)
5253 return (1 << rs6000_loop_align (label)) - 1;
5256 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5257 after applying N number of iterations. This routine does not determine
5258 how may iterations are required to reach desired alignment. */
5260 static bool
5261 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5263 if (is_packed)
5264 return false;
5266 if (TARGET_32BIT)
5268 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5269 return true;
5271 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5272 return true;
5274 return false;
5276 else
5278 if (TARGET_MACHO)
5279 return false;
5281 /* Assuming that all other types are naturally aligned. CHECKME! */
5282 return true;
5286 /* Return true if the vector misalignment factor is supported by the
5287 target. */
5288 static bool
5289 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5290 const_tree type,
5291 int misalignment,
5292 bool is_packed)
5294 if (TARGET_VSX)
5296 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5297 return true;
5299 /* Return if movmisalign pattern is not supported for this mode. */
5300 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5301 return false;
5303 if (misalignment == -1)
5305 /* Misalignment factor is unknown at compile time but we know
5306 it's word aligned. */
5307 if (rs6000_vector_alignment_reachable (type, is_packed))
5309 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5311 if (element_size == 64 || element_size == 32)
5312 return true;
5315 return false;
5318 /* VSX supports word-aligned vector. */
5319 if (misalignment % 4 == 0)
5320 return true;
5322 return false;
5325 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5326 static int
5327 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5328 tree vectype, int misalign)
5330 unsigned elements;
5331 tree elem_type;
5333 switch (type_of_cost)
5335 case scalar_stmt:
5336 case scalar_load:
5337 case scalar_store:
5338 case vector_stmt:
5339 case vector_load:
5340 case vector_store:
5341 case vec_to_scalar:
5342 case scalar_to_vec:
5343 case cond_branch_not_taken:
5344 return 1;
5346 case vec_perm:
5347 if (TARGET_VSX)
5348 return 3;
5349 else
5350 return 1;
5352 case vec_promote_demote:
5353 if (TARGET_VSX)
5354 return 4;
5355 else
5356 return 1;
5358 case cond_branch_taken:
5359 return 3;
5361 case unaligned_load:
5362 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5363 return 1;
5365 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5367 elements = TYPE_VECTOR_SUBPARTS (vectype);
5368 if (elements == 2)
5369 /* Double word aligned. */
5370 return 2;
5372 if (elements == 4)
5374 switch (misalign)
5376 case 8:
5377 /* Double word aligned. */
5378 return 2;
5380 case -1:
5381 /* Unknown misalignment. */
5382 case 4:
5383 case 12:
5384 /* Word aligned. */
5385 return 22;
5387 default:
5388 gcc_unreachable ();
5393 if (TARGET_ALTIVEC)
5394 /* Misaligned loads are not supported. */
5395 gcc_unreachable ();
5397 return 2;
5399 case unaligned_store:
5400 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5401 return 1;
5403 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5405 elements = TYPE_VECTOR_SUBPARTS (vectype);
5406 if (elements == 2)
5407 /* Double word aligned. */
5408 return 2;
5410 if (elements == 4)
5412 switch (misalign)
5414 case 8:
5415 /* Double word aligned. */
5416 return 2;
5418 case -1:
5419 /* Unknown misalignment. */
5420 case 4:
5421 case 12:
5422 /* Word aligned. */
5423 return 23;
5425 default:
5426 gcc_unreachable ();
5431 if (TARGET_ALTIVEC)
5432 /* Misaligned stores are not supported. */
5433 gcc_unreachable ();
5435 return 2;
5437 case vec_construct:
5438 /* This is a rough approximation assuming non-constant elements
5439 constructed into a vector via element insertion. FIXME:
5440 vec_construct is not granular enough for uniformly good
5441 decisions. If the initialization is a splat, this is
5442 cheaper than we estimate. Improve this someday. */
5443 elem_type = TREE_TYPE (vectype);
5444 /* 32-bit vectors loaded into registers are stored as double
5445 precision, so we need 2 permutes, 2 converts, and 1 merge
5446 to construct a vector of short floats from them. */
5447 if (SCALAR_FLOAT_TYPE_P (elem_type)
5448 && TYPE_PRECISION (elem_type) == 32)
5449 return 5;
5450 else
5451 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5453 default:
5454 gcc_unreachable ();
5458 /* Implement targetm.vectorize.preferred_simd_mode. */
5460 static machine_mode
5461 rs6000_preferred_simd_mode (machine_mode mode)
5463 if (TARGET_VSX)
5464 switch (mode)
5466 case DFmode:
5467 return V2DFmode;
5468 default:;
5470 if (TARGET_ALTIVEC || TARGET_VSX)
5471 switch (mode)
5473 case SFmode:
5474 return V4SFmode;
5475 case TImode:
5476 return V1TImode;
5477 case DImode:
5478 return V2DImode;
5479 case SImode:
5480 return V4SImode;
5481 case HImode:
5482 return V8HImode;
5483 case QImode:
5484 return V16QImode;
5485 default:;
5487 if (TARGET_SPE)
5488 switch (mode)
5490 case SFmode:
5491 return V2SFmode;
5492 case SImode:
5493 return V2SImode;
5494 default:;
5496 if (TARGET_PAIRED_FLOAT
5497 && mode == SFmode)
5498 return V2SFmode;
5499 return word_mode;
5502 typedef struct _rs6000_cost_data
5504 struct loop *loop_info;
5505 unsigned cost[3];
5506 } rs6000_cost_data;
5508 /* Test for likely overcommitment of vector hardware resources. If a
5509 loop iteration is relatively large, and too large a percentage of
5510 instructions in the loop are vectorized, the cost model may not
5511 adequately reflect delays from unavailable vector resources.
5512 Penalize the loop body cost for this case. */
5514 static void
5515 rs6000_density_test (rs6000_cost_data *data)
5517 const int DENSITY_PCT_THRESHOLD = 85;
5518 const int DENSITY_SIZE_THRESHOLD = 70;
5519 const int DENSITY_PENALTY = 10;
5520 struct loop *loop = data->loop_info;
5521 basic_block *bbs = get_loop_body (loop);
5522 int nbbs = loop->num_nodes;
5523 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5524 int i, density_pct;
5526 for (i = 0; i < nbbs; i++)
5528 basic_block bb = bbs[i];
5529 gimple_stmt_iterator gsi;
5531 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5533 gimple *stmt = gsi_stmt (gsi);
5534 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5536 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5537 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5538 not_vec_cost++;
5542 free (bbs);
5543 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5545 if (density_pct > DENSITY_PCT_THRESHOLD
5546 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5548 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5549 if (dump_enabled_p ())
5550 dump_printf_loc (MSG_NOTE, vect_location,
5551 "density %d%%, cost %d exceeds threshold, penalizing "
5552 "loop body cost by %d%%", density_pct,
5553 vec_cost + not_vec_cost, DENSITY_PENALTY);
5557 /* Implement targetm.vectorize.init_cost. */
5559 static void *
5560 rs6000_init_cost (struct loop *loop_info)
5562 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5563 data->loop_info = loop_info;
5564 data->cost[vect_prologue] = 0;
5565 data->cost[vect_body] = 0;
5566 data->cost[vect_epilogue] = 0;
5567 return data;
5570 /* Implement targetm.vectorize.add_stmt_cost. */
5572 static unsigned
5573 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5574 struct _stmt_vec_info *stmt_info, int misalign,
5575 enum vect_cost_model_location where)
5577 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5578 unsigned retval = 0;
5580 if (flag_vect_cost_model)
5582 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5583 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5584 misalign);
5585 /* Statements in an inner loop relative to the loop being
5586 vectorized are weighted more heavily. The value here is
5587 arbitrary and could potentially be improved with analysis. */
5588 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5589 count *= 50; /* FIXME. */
5591 retval = (unsigned) (count * stmt_cost);
5592 cost_data->cost[where] += retval;
5595 return retval;
5598 /* Implement targetm.vectorize.finish_cost. */
5600 static void
5601 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5602 unsigned *body_cost, unsigned *epilogue_cost)
5604 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5606 if (cost_data->loop_info)
5607 rs6000_density_test (cost_data);
5609 *prologue_cost = cost_data->cost[vect_prologue];
5610 *body_cost = cost_data->cost[vect_body];
5611 *epilogue_cost = cost_data->cost[vect_epilogue];
5614 /* Implement targetm.vectorize.destroy_cost_data. */
5616 static void
5617 rs6000_destroy_cost_data (void *data)
5619 free (data);
5622 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5623 library with vectorized intrinsics. */
5625 static tree
5626 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5627 tree type_in)
5629 char name[32];
5630 const char *suffix = NULL;
5631 tree fntype, new_fndecl, bdecl = NULL_TREE;
5632 int n_args = 1;
5633 const char *bname;
5634 machine_mode el_mode, in_mode;
5635 int n, in_n;
5637 /* Libmass is suitable for unsafe math only as it does not correctly support
5638 parts of IEEE with the required precision such as denormals. Only support
5639 it if we have VSX to use the simd d2 or f4 functions.
5640 XXX: Add variable length support. */
5641 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5642 return NULL_TREE;
5644 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5645 n = TYPE_VECTOR_SUBPARTS (type_out);
5646 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5647 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5648 if (el_mode != in_mode
5649 || n != in_n)
5650 return NULL_TREE;
5652 switch (fn)
5654 CASE_CFN_ATAN2:
5655 CASE_CFN_HYPOT:
5656 CASE_CFN_POW:
5657 n_args = 2;
5658 gcc_fallthrough ();
5660 CASE_CFN_ACOS:
5661 CASE_CFN_ACOSH:
5662 CASE_CFN_ASIN:
5663 CASE_CFN_ASINH:
5664 CASE_CFN_ATAN:
5665 CASE_CFN_ATANH:
5666 CASE_CFN_CBRT:
5667 CASE_CFN_COS:
5668 CASE_CFN_COSH:
5669 CASE_CFN_ERF:
5670 CASE_CFN_ERFC:
5671 CASE_CFN_EXP2:
5672 CASE_CFN_EXP:
5673 CASE_CFN_EXPM1:
5674 CASE_CFN_LGAMMA:
5675 CASE_CFN_LOG10:
5676 CASE_CFN_LOG1P:
5677 CASE_CFN_LOG2:
5678 CASE_CFN_LOG:
5679 CASE_CFN_SIN:
5680 CASE_CFN_SINH:
5681 CASE_CFN_SQRT:
5682 CASE_CFN_TAN:
5683 CASE_CFN_TANH:
5684 if (el_mode == DFmode && n == 2)
5686 bdecl = mathfn_built_in (double_type_node, fn);
5687 suffix = "d2"; /* pow -> powd2 */
5689 else if (el_mode == SFmode && n == 4)
5691 bdecl = mathfn_built_in (float_type_node, fn);
5692 suffix = "4"; /* powf -> powf4 */
5694 else
5695 return NULL_TREE;
5696 if (!bdecl)
5697 return NULL_TREE;
5698 break;
5700 default:
5701 return NULL_TREE;
5704 gcc_assert (suffix != NULL);
5705 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5706 if (!bname)
5707 return NULL_TREE;
5709 strcpy (name, bname + sizeof ("__builtin_") - 1);
5710 strcat (name, suffix);
5712 if (n_args == 1)
5713 fntype = build_function_type_list (type_out, type_in, NULL);
5714 else if (n_args == 2)
5715 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5716 else
5717 gcc_unreachable ();
5719 /* Build a function declaration for the vectorized function. */
5720 new_fndecl = build_decl (BUILTINS_LOCATION,
5721 FUNCTION_DECL, get_identifier (name), fntype);
5722 TREE_PUBLIC (new_fndecl) = 1;
5723 DECL_EXTERNAL (new_fndecl) = 1;
5724 DECL_IS_NOVOPS (new_fndecl) = 1;
5725 TREE_READONLY (new_fndecl) = 1;
5727 return new_fndecl;
5730 /* Returns a function decl for a vectorized version of the builtin function
5731 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5732 if it is not available. */
5734 static tree
5735 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5736 tree type_in)
5738 machine_mode in_mode, out_mode;
5739 int in_n, out_n;
5741 if (TARGET_DEBUG_BUILTIN)
5742 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5743 combined_fn_name (combined_fn (fn)),
5744 GET_MODE_NAME (TYPE_MODE (type_out)),
5745 GET_MODE_NAME (TYPE_MODE (type_in)));
5747 if (TREE_CODE (type_out) != VECTOR_TYPE
5748 || TREE_CODE (type_in) != VECTOR_TYPE
5749 || !TARGET_VECTORIZE_BUILTINS)
5750 return NULL_TREE;
5752 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5753 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5754 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5755 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5757 switch (fn)
5759 CASE_CFN_COPYSIGN:
5760 if (VECTOR_UNIT_VSX_P (V2DFmode)
5761 && out_mode == DFmode && out_n == 2
5762 && in_mode == DFmode && in_n == 2)
5763 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5764 if (VECTOR_UNIT_VSX_P (V4SFmode)
5765 && out_mode == SFmode && out_n == 4
5766 && in_mode == SFmode && in_n == 4)
5767 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5768 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5769 && out_mode == SFmode && out_n == 4
5770 && in_mode == SFmode && in_n == 4)
5771 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5772 break;
5773 CASE_CFN_CEIL:
5774 if (VECTOR_UNIT_VSX_P (V2DFmode)
5775 && out_mode == DFmode && out_n == 2
5776 && in_mode == DFmode && in_n == 2)
5777 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5778 if (VECTOR_UNIT_VSX_P (V4SFmode)
5779 && out_mode == SFmode && out_n == 4
5780 && in_mode == SFmode && in_n == 4)
5781 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5782 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5783 && out_mode == SFmode && out_n == 4
5784 && in_mode == SFmode && in_n == 4)
5785 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5786 break;
5787 CASE_CFN_FLOOR:
5788 if (VECTOR_UNIT_VSX_P (V2DFmode)
5789 && out_mode == DFmode && out_n == 2
5790 && in_mode == DFmode && in_n == 2)
5791 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5792 if (VECTOR_UNIT_VSX_P (V4SFmode)
5793 && out_mode == SFmode && out_n == 4
5794 && in_mode == SFmode && in_n == 4)
5795 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5796 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5797 && out_mode == SFmode && out_n == 4
5798 && in_mode == SFmode && in_n == 4)
5799 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5800 break;
5801 CASE_CFN_FMA:
5802 if (VECTOR_UNIT_VSX_P (V2DFmode)
5803 && out_mode == DFmode && out_n == 2
5804 && in_mode == DFmode && in_n == 2)
5805 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5806 if (VECTOR_UNIT_VSX_P (V4SFmode)
5807 && out_mode == SFmode && out_n == 4
5808 && in_mode == SFmode && in_n == 4)
5809 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5810 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5811 && out_mode == SFmode && out_n == 4
5812 && in_mode == SFmode && in_n == 4)
5813 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5814 break;
5815 CASE_CFN_TRUNC:
5816 if (VECTOR_UNIT_VSX_P (V2DFmode)
5817 && out_mode == DFmode && out_n == 2
5818 && in_mode == DFmode && in_n == 2)
5819 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5820 if (VECTOR_UNIT_VSX_P (V4SFmode)
5821 && out_mode == SFmode && out_n == 4
5822 && in_mode == SFmode && in_n == 4)
5823 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5824 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5825 && out_mode == SFmode && out_n == 4
5826 && in_mode == SFmode && in_n == 4)
5827 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5828 break;
5829 CASE_CFN_NEARBYINT:
5830 if (VECTOR_UNIT_VSX_P (V2DFmode)
5831 && flag_unsafe_math_optimizations
5832 && out_mode == DFmode && out_n == 2
5833 && in_mode == DFmode && in_n == 2)
5834 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5835 if (VECTOR_UNIT_VSX_P (V4SFmode)
5836 && flag_unsafe_math_optimizations
5837 && out_mode == SFmode && out_n == 4
5838 && in_mode == SFmode && in_n == 4)
5839 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5840 break;
5841 CASE_CFN_RINT:
5842 if (VECTOR_UNIT_VSX_P (V2DFmode)
5843 && !flag_trapping_math
5844 && out_mode == DFmode && out_n == 2
5845 && in_mode == DFmode && in_n == 2)
5846 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5847 if (VECTOR_UNIT_VSX_P (V4SFmode)
5848 && !flag_trapping_math
5849 && out_mode == SFmode && out_n == 4
5850 && in_mode == SFmode && in_n == 4)
5851 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5852 break;
5853 default:
5854 break;
5857 /* Generate calls to libmass if appropriate. */
5858 if (rs6000_veclib_handler)
5859 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5861 return NULL_TREE;
5864 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5866 static tree
5867 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5868 tree type_in)
5870 machine_mode in_mode, out_mode;
5871 int in_n, out_n;
5873 if (TARGET_DEBUG_BUILTIN)
5874 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5875 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5876 GET_MODE_NAME (TYPE_MODE (type_out)),
5877 GET_MODE_NAME (TYPE_MODE (type_in)));
5879 if (TREE_CODE (type_out) != VECTOR_TYPE
5880 || TREE_CODE (type_in) != VECTOR_TYPE
5881 || !TARGET_VECTORIZE_BUILTINS)
5882 return NULL_TREE;
5884 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5885 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5886 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5887 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5889 enum rs6000_builtins fn
5890 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5891 switch (fn)
5893 case RS6000_BUILTIN_RSQRTF:
5894 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5895 && out_mode == SFmode && out_n == 4
5896 && in_mode == SFmode && in_n == 4)
5897 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5898 break;
5899 case RS6000_BUILTIN_RSQRT:
5900 if (VECTOR_UNIT_VSX_P (V2DFmode)
5901 && out_mode == DFmode && out_n == 2
5902 && in_mode == DFmode && in_n == 2)
5903 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5904 break;
5905 case RS6000_BUILTIN_RECIPF:
5906 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5907 && out_mode == SFmode && out_n == 4
5908 && in_mode == SFmode && in_n == 4)
5909 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5910 break;
5911 case RS6000_BUILTIN_RECIP:
5912 if (VECTOR_UNIT_VSX_P (V2DFmode)
5913 && out_mode == DFmode && out_n == 2
5914 && in_mode == DFmode && in_n == 2)
5915 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5916 break;
5917 default:
5918 break;
5920 return NULL_TREE;
5923 /* Default CPU string for rs6000*_file_start functions. */
5924 static const char *rs6000_default_cpu;
5926 /* Do anything needed at the start of the asm file. */
5928 static void
5929 rs6000_file_start (void)
5931 char buffer[80];
5932 const char *start = buffer;
5933 FILE *file = asm_out_file;
5935 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5937 default_file_start ();
5939 if (flag_verbose_asm)
5941 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5943 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5945 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5946 start = "";
5949 if (global_options_set.x_rs6000_cpu_index)
5951 fprintf (file, "%s -mcpu=%s", start,
5952 processor_target_table[rs6000_cpu_index].name);
5953 start = "";
5956 if (global_options_set.x_rs6000_tune_index)
5958 fprintf (file, "%s -mtune=%s", start,
5959 processor_target_table[rs6000_tune_index].name);
5960 start = "";
5963 if (PPC405_ERRATUM77)
5965 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5966 start = "";
5969 #ifdef USING_ELFOS_H
5970 switch (rs6000_sdata)
5972 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5973 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5974 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5975 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5978 if (rs6000_sdata && g_switch_value)
5980 fprintf (file, "%s -G %d", start,
5981 g_switch_value);
5982 start = "";
5984 #endif
5986 if (*start == '\0')
5987 putc ('\n', file);
5990 #ifdef USING_ELFOS_H
5991 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5992 && !global_options_set.x_rs6000_cpu_index)
5994 fputs ("\t.machine ", asm_out_file);
5995 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5996 fputs ("power9\n", asm_out_file);
5997 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5998 fputs ("power8\n", asm_out_file);
5999 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6000 fputs ("power7\n", asm_out_file);
6001 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6002 fputs ("power6\n", asm_out_file);
6003 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6004 fputs ("power5\n", asm_out_file);
6005 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6006 fputs ("power4\n", asm_out_file);
6007 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6008 fputs ("ppc64\n", asm_out_file);
6009 else
6010 fputs ("ppc\n", asm_out_file);
6012 #endif
6014 if (DEFAULT_ABI == ABI_ELFv2)
6015 fprintf (file, "\t.abiversion 2\n");
6019 /* Return nonzero if this function is known to have a null epilogue. */
6022 direct_return (void)
6024 if (reload_completed)
6026 rs6000_stack_t *info = rs6000_stack_info ();
6028 if (info->first_gp_reg_save == 32
6029 && info->first_fp_reg_save == 64
6030 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6031 && ! info->lr_save_p
6032 && ! info->cr_save_p
6033 && info->vrsave_size == 0
6034 && ! info->push_p)
6035 return 1;
6038 return 0;
6041 /* Return the number of instructions it takes to form a constant in an
6042 integer register. */
6045 num_insns_constant_wide (HOST_WIDE_INT value)
6047 /* signed constant loadable with addi */
6048 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6049 return 1;
6051 /* constant loadable with addis */
6052 else if ((value & 0xffff) == 0
6053 && (value >> 31 == -1 || value >> 31 == 0))
6054 return 1;
6056 else if (TARGET_POWERPC64)
6058 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6059 HOST_WIDE_INT high = value >> 31;
6061 if (high == 0 || high == -1)
6062 return 2;
6064 high >>= 1;
6066 if (low == 0)
6067 return num_insns_constant_wide (high) + 1;
6068 else if (high == 0)
6069 return num_insns_constant_wide (low) + 1;
6070 else
6071 return (num_insns_constant_wide (high)
6072 + num_insns_constant_wide (low) + 1);
6075 else
6076 return 2;
6080 num_insns_constant (rtx op, machine_mode mode)
6082 HOST_WIDE_INT low, high;
6084 switch (GET_CODE (op))
6086 case CONST_INT:
6087 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6088 && rs6000_is_valid_and_mask (op, mode))
6089 return 2;
6090 else
6091 return num_insns_constant_wide (INTVAL (op));
6093 case CONST_WIDE_INT:
6095 int i;
6096 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6097 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6098 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6099 return ins;
6102 case CONST_DOUBLE:
6103 if (mode == SFmode || mode == SDmode)
6105 long l;
6107 if (DECIMAL_FLOAT_MODE_P (mode))
6108 REAL_VALUE_TO_TARGET_DECIMAL32
6109 (*CONST_DOUBLE_REAL_VALUE (op), l);
6110 else
6111 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6112 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6115 long l[2];
6116 if (DECIMAL_FLOAT_MODE_P (mode))
6117 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6118 else
6119 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6120 high = l[WORDS_BIG_ENDIAN == 0];
6121 low = l[WORDS_BIG_ENDIAN != 0];
6123 if (TARGET_32BIT)
6124 return (num_insns_constant_wide (low)
6125 + num_insns_constant_wide (high));
6126 else
6128 if ((high == 0 && low >= 0)
6129 || (high == -1 && low < 0))
6130 return num_insns_constant_wide (low);
6132 else if (rs6000_is_valid_and_mask (op, mode))
6133 return 2;
6135 else if (low == 0)
6136 return num_insns_constant_wide (high) + 1;
6138 else
6139 return (num_insns_constant_wide (high)
6140 + num_insns_constant_wide (low) + 1);
6143 default:
6144 gcc_unreachable ();
6148 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6149 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6150 corresponding element of the vector, but for V4SFmode and V2SFmode,
6151 the corresponding "float" is interpreted as an SImode integer. */
6153 HOST_WIDE_INT
6154 const_vector_elt_as_int (rtx op, unsigned int elt)
6156 rtx tmp;
6158 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6159 gcc_assert (GET_MODE (op) != V2DImode
6160 && GET_MODE (op) != V2DFmode);
6162 tmp = CONST_VECTOR_ELT (op, elt);
6163 if (GET_MODE (op) == V4SFmode
6164 || GET_MODE (op) == V2SFmode)
6165 tmp = gen_lowpart (SImode, tmp);
6166 return INTVAL (tmp);
6169 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6170 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6171 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6172 all items are set to the same value and contain COPIES replicas of the
6173 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6174 operand and the others are set to the value of the operand's msb. */
6176 static bool
6177 vspltis_constant (rtx op, unsigned step, unsigned copies)
6179 machine_mode mode = GET_MODE (op);
6180 machine_mode inner = GET_MODE_INNER (mode);
6182 unsigned i;
6183 unsigned nunits;
6184 unsigned bitsize;
6185 unsigned mask;
6187 HOST_WIDE_INT val;
6188 HOST_WIDE_INT splat_val;
6189 HOST_WIDE_INT msb_val;
6191 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6192 return false;
6194 nunits = GET_MODE_NUNITS (mode);
6195 bitsize = GET_MODE_BITSIZE (inner);
6196 mask = GET_MODE_MASK (inner);
6198 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6199 splat_val = val;
6200 msb_val = val >= 0 ? 0 : -1;
6202 /* Construct the value to be splatted, if possible. If not, return 0. */
6203 for (i = 2; i <= copies; i *= 2)
6205 HOST_WIDE_INT small_val;
6206 bitsize /= 2;
6207 small_val = splat_val >> bitsize;
6208 mask >>= bitsize;
6209 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6210 return false;
6211 splat_val = small_val;
6214 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6215 if (EASY_VECTOR_15 (splat_val))
6218 /* Also check if we can splat, and then add the result to itself. Do so if
6219 the value is positive, of if the splat instruction is using OP's mode;
6220 for splat_val < 0, the splat and the add should use the same mode. */
6221 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6222 && (splat_val >= 0 || (step == 1 && copies == 1)))
6225 /* Also check if are loading up the most significant bit which can be done by
6226 loading up -1 and shifting the value left by -1. */
6227 else if (EASY_VECTOR_MSB (splat_val, inner))
6230 else
6231 return false;
6233 /* Check if VAL is present in every STEP-th element, and the
6234 other elements are filled with its most significant bit. */
6235 for (i = 1; i < nunits; ++i)
6237 HOST_WIDE_INT desired_val;
6238 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6239 if ((i & (step - 1)) == 0)
6240 desired_val = val;
6241 else
6242 desired_val = msb_val;
6244 if (desired_val != const_vector_elt_as_int (op, elt))
6245 return false;
6248 return true;
6251 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6252 instruction, filling in the bottom elements with 0 or -1.
6254 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6255 for the number of zeroes to shift in, or negative for the number of 0xff
6256 bytes to shift in.
6258 OP is a CONST_VECTOR. */
6261 vspltis_shifted (rtx op)
6263 machine_mode mode = GET_MODE (op);
6264 machine_mode inner = GET_MODE_INNER (mode);
6266 unsigned i, j;
6267 unsigned nunits;
6268 unsigned mask;
6270 HOST_WIDE_INT val;
6272 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6273 return false;
6275 /* We need to create pseudo registers to do the shift, so don't recognize
6276 shift vector constants after reload. */
6277 if (!can_create_pseudo_p ())
6278 return false;
6280 nunits = GET_MODE_NUNITS (mode);
6281 mask = GET_MODE_MASK (inner);
6283 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6285 /* Check if the value can really be the operand of a vspltis[bhw]. */
6286 if (EASY_VECTOR_15 (val))
6289 /* Also check if we are loading up the most significant bit which can be done
6290 by loading up -1 and shifting the value left by -1. */
6291 else if (EASY_VECTOR_MSB (val, inner))
6294 else
6295 return 0;
6297 /* Check if VAL is present in every STEP-th element until we find elements
6298 that are 0 or all 1 bits. */
6299 for (i = 1; i < nunits; ++i)
6301 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6302 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6304 /* If the value isn't the splat value, check for the remaining elements
6305 being 0/-1. */
6306 if (val != elt_val)
6308 if (elt_val == 0)
6310 for (j = i+1; j < nunits; ++j)
6312 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6313 if (const_vector_elt_as_int (op, elt2) != 0)
6314 return 0;
6317 return (nunits - i) * GET_MODE_SIZE (inner);
6320 else if ((elt_val & mask) == mask)
6322 for (j = i+1; j < nunits; ++j)
6324 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6325 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6326 return 0;
6329 return -((nunits - i) * GET_MODE_SIZE (inner));
6332 else
6333 return 0;
6337 /* If all elements are equal, we don't need to do VLSDOI. */
6338 return 0;
6342 /* Return true if OP is of the given MODE and can be synthesized
6343 with a vspltisb, vspltish or vspltisw. */
6345 bool
6346 easy_altivec_constant (rtx op, machine_mode mode)
6348 unsigned step, copies;
6350 if (mode == VOIDmode)
6351 mode = GET_MODE (op);
6352 else if (mode != GET_MODE (op))
6353 return false;
6355 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6356 constants. */
6357 if (mode == V2DFmode)
6358 return zero_constant (op, mode);
6360 else if (mode == V2DImode)
6362 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6363 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6364 return false;
6366 if (zero_constant (op, mode))
6367 return true;
6369 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6370 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6371 return true;
6373 return false;
6376 /* V1TImode is a special container for TImode. Ignore for now. */
6377 else if (mode == V1TImode)
6378 return false;
6380 /* Start with a vspltisw. */
6381 step = GET_MODE_NUNITS (mode) / 4;
6382 copies = 1;
6384 if (vspltis_constant (op, step, copies))
6385 return true;
6387 /* Then try with a vspltish. */
6388 if (step == 1)
6389 copies <<= 1;
6390 else
6391 step >>= 1;
6393 if (vspltis_constant (op, step, copies))
6394 return true;
6396 /* And finally a vspltisb. */
6397 if (step == 1)
6398 copies <<= 1;
6399 else
6400 step >>= 1;
6402 if (vspltis_constant (op, step, copies))
6403 return true;
6405 if (vspltis_shifted (op) != 0)
6406 return true;
6408 return false;
6411 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6412 result is OP. Abort if it is not possible. */
6415 gen_easy_altivec_constant (rtx op)
6417 machine_mode mode = GET_MODE (op);
6418 int nunits = GET_MODE_NUNITS (mode);
6419 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6420 unsigned step = nunits / 4;
6421 unsigned copies = 1;
6423 /* Start with a vspltisw. */
6424 if (vspltis_constant (op, step, copies))
6425 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6427 /* Then try with a vspltish. */
6428 if (step == 1)
6429 copies <<= 1;
6430 else
6431 step >>= 1;
6433 if (vspltis_constant (op, step, copies))
6434 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6436 /* And finally a vspltisb. */
6437 if (step == 1)
6438 copies <<= 1;
6439 else
6440 step >>= 1;
6442 if (vspltis_constant (op, step, copies))
6443 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6445 gcc_unreachable ();
6448 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6449 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6451 Return the number of instructions needed (1 or 2) into the address pointed
6452 via NUM_INSNS_PTR.
6454 Return the constant that is being split via CONSTANT_PTR. */
6456 bool
6457 xxspltib_constant_p (rtx op,
6458 machine_mode mode,
6459 int *num_insns_ptr,
6460 int *constant_ptr)
6462 size_t nunits = GET_MODE_NUNITS (mode);
6463 size_t i;
6464 HOST_WIDE_INT value;
6465 rtx element;
6467 /* Set the returned values to out of bound values. */
6468 *num_insns_ptr = -1;
6469 *constant_ptr = 256;
6471 if (!TARGET_P9_VECTOR)
6472 return false;
6474 if (mode == VOIDmode)
6475 mode = GET_MODE (op);
6477 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6478 return false;
6480 /* Handle (vec_duplicate <constant>). */
6481 if (GET_CODE (op) == VEC_DUPLICATE)
6483 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6484 && mode != V2DImode)
6485 return false;
6487 element = XEXP (op, 0);
6488 if (!CONST_INT_P (element))
6489 return false;
6491 value = INTVAL (element);
6492 if (!IN_RANGE (value, -128, 127))
6493 return false;
6496 /* Handle (const_vector [...]). */
6497 else if (GET_CODE (op) == CONST_VECTOR)
6499 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6500 && mode != V2DImode)
6501 return false;
6503 element = CONST_VECTOR_ELT (op, 0);
6504 if (!CONST_INT_P (element))
6505 return false;
6507 value = INTVAL (element);
6508 if (!IN_RANGE (value, -128, 127))
6509 return false;
6511 for (i = 1; i < nunits; i++)
6513 element = CONST_VECTOR_ELT (op, i);
6514 if (!CONST_INT_P (element))
6515 return false;
6517 if (value != INTVAL (element))
6518 return false;
6522 /* Handle integer constants being loaded into the upper part of the VSX
6523 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6524 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6525 else if (CONST_INT_P (op))
6527 if (!SCALAR_INT_MODE_P (mode))
6528 return false;
6530 value = INTVAL (op);
6531 if (!IN_RANGE (value, -128, 127))
6532 return false;
6534 if (!IN_RANGE (value, -1, 0))
6536 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6537 return false;
6539 if (EASY_VECTOR_15 (value))
6540 return false;
6544 else
6545 return false;
6547 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6548 sign extend. Special case 0/-1 to allow getting any VSX register instead
6549 of an Altivec register. */
6550 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6551 && EASY_VECTOR_15 (value))
6552 return false;
6554 /* Return # of instructions and the constant byte for XXSPLTIB. */
6555 if (mode == V16QImode)
6556 *num_insns_ptr = 1;
6558 else if (IN_RANGE (value, -1, 0))
6559 *num_insns_ptr = 1;
6561 else
6562 *num_insns_ptr = 2;
6564 *constant_ptr = (int) value;
6565 return true;
6568 const char *
6569 output_vec_const_move (rtx *operands)
6571 int cst, cst2, shift;
6572 machine_mode mode;
6573 rtx dest, vec;
6575 dest = operands[0];
6576 vec = operands[1];
6577 mode = GET_MODE (dest);
6579 if (TARGET_VSX)
6581 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6582 int xxspltib_value = 256;
6583 int num_insns = -1;
6585 if (zero_constant (vec, mode))
6587 if (TARGET_P9_VECTOR)
6588 return "xxspltib %x0,0";
6590 else if (dest_vmx_p)
6591 return "vspltisw %0,0";
6593 else
6594 return "xxlxor %x0,%x0,%x0";
6597 if (all_ones_constant (vec, mode))
6599 if (TARGET_P9_VECTOR)
6600 return "xxspltib %x0,255";
6602 else if (dest_vmx_p)
6603 return "vspltisw %0,-1";
6605 else if (TARGET_P8_VECTOR)
6606 return "xxlorc %x0,%x0,%x0";
6608 else
6609 gcc_unreachable ();
6612 if (TARGET_P9_VECTOR
6613 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6615 if (num_insns == 1)
6617 operands[2] = GEN_INT (xxspltib_value & 0xff);
6618 return "xxspltib %x0,%2";
6621 return "#";
6625 if (TARGET_ALTIVEC)
6627 rtx splat_vec;
6629 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6630 if (zero_constant (vec, mode))
6631 return "vspltisw %0,0";
6633 if (all_ones_constant (vec, mode))
6634 return "vspltisw %0,-1";
6636 /* Do we need to construct a value using VSLDOI? */
6637 shift = vspltis_shifted (vec);
6638 if (shift != 0)
6639 return "#";
6641 splat_vec = gen_easy_altivec_constant (vec);
6642 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6643 operands[1] = XEXP (splat_vec, 0);
6644 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6645 return "#";
6647 switch (GET_MODE (splat_vec))
6649 case V4SImode:
6650 return "vspltisw %0,%1";
6652 case V8HImode:
6653 return "vspltish %0,%1";
6655 case V16QImode:
6656 return "vspltisb %0,%1";
6658 default:
6659 gcc_unreachable ();
6663 gcc_assert (TARGET_SPE);
6665 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6666 pattern of V1DI, V4HI, and V2SF.
6668 FIXME: We should probably return # and add post reload
6669 splitters for these, but this way is so easy ;-). */
6670 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6671 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6672 operands[1] = CONST_VECTOR_ELT (vec, 0);
6673 operands[2] = CONST_VECTOR_ELT (vec, 1);
6674 if (cst == cst2)
6675 return "li %0,%1\n\tevmergelo %0,%0,%0";
6676 else if (WORDS_BIG_ENDIAN)
6677 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6678 else
6679 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6682 /* Initialize TARGET of vector PAIRED to VALS. */
6684 void
6685 paired_expand_vector_init (rtx target, rtx vals)
6687 machine_mode mode = GET_MODE (target);
6688 int n_elts = GET_MODE_NUNITS (mode);
6689 int n_var = 0;
6690 rtx x, new_rtx, tmp, constant_op, op1, op2;
6691 int i;
6693 for (i = 0; i < n_elts; ++i)
6695 x = XVECEXP (vals, 0, i);
6696 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6697 ++n_var;
6699 if (n_var == 0)
6701 /* Load from constant pool. */
6702 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6703 return;
6706 if (n_var == 2)
6708 /* The vector is initialized only with non-constants. */
6709 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6710 XVECEXP (vals, 0, 1));
6712 emit_move_insn (target, new_rtx);
6713 return;
6716 /* One field is non-constant and the other one is a constant. Load the
6717 constant from the constant pool and use ps_merge instruction to
6718 construct the whole vector. */
6719 op1 = XVECEXP (vals, 0, 0);
6720 op2 = XVECEXP (vals, 0, 1);
6722 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6724 tmp = gen_reg_rtx (GET_MODE (constant_op));
6725 emit_move_insn (tmp, constant_op);
6727 if (CONSTANT_P (op1))
6728 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6729 else
6730 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6732 emit_move_insn (target, new_rtx);
6735 void
6736 paired_expand_vector_move (rtx operands[])
6738 rtx op0 = operands[0], op1 = operands[1];
6740 emit_move_insn (op0, op1);
6743 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6744 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6745 operands for the relation operation COND. This is a recursive
6746 function. */
6748 static void
6749 paired_emit_vector_compare (enum rtx_code rcode,
6750 rtx dest, rtx op0, rtx op1,
6751 rtx cc_op0, rtx cc_op1)
6753 rtx tmp = gen_reg_rtx (V2SFmode);
6754 rtx tmp1, max, min;
6756 gcc_assert (TARGET_PAIRED_FLOAT);
6757 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6759 switch (rcode)
6761 case LT:
6762 case LTU:
6763 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6764 return;
6765 case GE:
6766 case GEU:
6767 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6768 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6769 return;
6770 case LE:
6771 case LEU:
6772 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6773 return;
6774 case GT:
6775 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6776 return;
6777 case EQ:
6778 tmp1 = gen_reg_rtx (V2SFmode);
6779 max = gen_reg_rtx (V2SFmode);
6780 min = gen_reg_rtx (V2SFmode);
6781 gen_reg_rtx (V2SFmode);
6783 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6784 emit_insn (gen_selv2sf4
6785 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6786 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6787 emit_insn (gen_selv2sf4
6788 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6789 emit_insn (gen_subv2sf3 (tmp1, min, max));
6790 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6791 return;
6792 case NE:
6793 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6794 return;
6795 case UNLE:
6796 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6797 return;
6798 case UNLT:
6799 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6800 return;
6801 case UNGE:
6802 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6803 return;
6804 case UNGT:
6805 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6806 return;
6807 default:
6808 gcc_unreachable ();
6811 return;
6814 /* Emit vector conditional expression.
6815 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6816 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6819 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6820 rtx cond, rtx cc_op0, rtx cc_op1)
6822 enum rtx_code rcode = GET_CODE (cond);
6824 if (!TARGET_PAIRED_FLOAT)
6825 return 0;
6827 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6829 return 1;
6832 /* Initialize vector TARGET to VALS. */
6834 void
6835 rs6000_expand_vector_init (rtx target, rtx vals)
6837 machine_mode mode = GET_MODE (target);
6838 machine_mode inner_mode = GET_MODE_INNER (mode);
6839 int n_elts = GET_MODE_NUNITS (mode);
6840 int n_var = 0, one_var = -1;
6841 bool all_same = true, all_const_zero = true;
6842 rtx x, mem;
6843 int i;
6845 for (i = 0; i < n_elts; ++i)
6847 x = XVECEXP (vals, 0, i);
6848 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6849 ++n_var, one_var = i;
6850 else if (x != CONST0_RTX (inner_mode))
6851 all_const_zero = false;
6853 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6854 all_same = false;
6857 if (n_var == 0)
6859 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6860 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6861 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6863 /* Zero register. */
6864 emit_move_insn (target, CONST0_RTX (mode));
6865 return;
6867 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6869 /* Splat immediate. */
6870 emit_insn (gen_rtx_SET (target, const_vec));
6871 return;
6873 else
6875 /* Load from constant pool. */
6876 emit_move_insn (target, const_vec);
6877 return;
6881 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6882 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6884 rtx op0 = XVECEXP (vals, 0, 0);
6885 rtx op1 = XVECEXP (vals, 0, 1);
6886 if (all_same)
6888 if (!MEM_P (op0) && !REG_P (op0))
6889 op0 = force_reg (inner_mode, op0);
6890 if (mode == V2DFmode)
6891 emit_insn (gen_vsx_splat_v2df (target, op0));
6892 else
6893 emit_insn (gen_vsx_splat_v2di (target, op0));
6895 else
6897 op0 = force_reg (inner_mode, op0);
6898 op1 = force_reg (inner_mode, op1);
6899 if (mode == V2DFmode)
6900 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6901 else
6902 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6904 return;
6907 /* Special case initializing vector int if we are on 64-bit systems with
6908 direct move or we have the ISA 3.0 instructions. */
6909 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6910 && TARGET_DIRECT_MOVE_64BIT)
6912 if (all_same)
6914 rtx element0 = XVECEXP (vals, 0, 0);
6915 if (MEM_P (element0))
6916 element0 = rs6000_address_for_fpconvert (element0);
6917 else
6918 element0 = force_reg (SImode, element0);
6920 if (TARGET_P9_VECTOR)
6921 emit_insn (gen_vsx_splat_v4si (target, element0));
6922 else
6924 rtx tmp = gen_reg_rtx (DImode);
6925 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6926 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6928 return;
6930 else
6932 rtx elements[4];
6933 size_t i;
6935 for (i = 0; i < 4; i++)
6937 elements[i] = XVECEXP (vals, 0, i);
6938 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6939 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6942 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6943 elements[2], elements[3]));
6944 return;
6948 /* With single precision floating point on VSX, know that internally single
6949 precision is actually represented as a double, and either make 2 V2DF
6950 vectors, and convert these vectors to single precision, or do one
6951 conversion, and splat the result to the other elements. */
6952 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6954 if (all_same)
6956 rtx element0 = XVECEXP (vals, 0, 0);
6958 if (TARGET_P9_VECTOR)
6960 if (MEM_P (element0))
6961 element0 = rs6000_address_for_fpconvert (element0);
6963 emit_insn (gen_vsx_splat_v4sf (target, element0));
6966 else
6968 rtx freg = gen_reg_rtx (V4SFmode);
6969 rtx sreg = force_reg (SFmode, element0);
6970 rtx cvt = (TARGET_XSCVDPSPN
6971 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6972 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6974 emit_insn (cvt);
6975 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6976 const0_rtx));
6979 else
6981 rtx dbl_even = gen_reg_rtx (V2DFmode);
6982 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6983 rtx flt_even = gen_reg_rtx (V4SFmode);
6984 rtx flt_odd = gen_reg_rtx (V4SFmode);
6985 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6986 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6987 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6988 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6990 /* Use VMRGEW if we can instead of doing a permute. */
6991 if (TARGET_P8_VECTOR)
6993 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6994 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6995 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6996 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6997 if (BYTES_BIG_ENDIAN)
6998 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6999 else
7000 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7002 else
7004 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7005 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7006 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7007 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7008 rs6000_expand_extract_even (target, flt_even, flt_odd);
7011 return;
7014 /* Special case initializing vector short/char that are splats if we are on
7015 64-bit systems with direct move. */
7016 if (all_same && TARGET_DIRECT_MOVE_64BIT
7017 && (mode == V16QImode || mode == V8HImode))
7019 rtx op0 = XVECEXP (vals, 0, 0);
7020 rtx di_tmp = gen_reg_rtx (DImode);
7022 if (!REG_P (op0))
7023 op0 = force_reg (GET_MODE_INNER (mode), op0);
7025 if (mode == V16QImode)
7027 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7028 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7029 return;
7032 if (mode == V8HImode)
7034 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7035 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7036 return;
7040 /* Store value to stack temp. Load vector element. Splat. However, splat
7041 of 64-bit items is not supported on Altivec. */
7042 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7044 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7045 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7046 XVECEXP (vals, 0, 0));
7047 x = gen_rtx_UNSPEC (VOIDmode,
7048 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7049 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7050 gen_rtvec (2,
7051 gen_rtx_SET (target, mem),
7052 x)));
7053 x = gen_rtx_VEC_SELECT (inner_mode, target,
7054 gen_rtx_PARALLEL (VOIDmode,
7055 gen_rtvec (1, const0_rtx)));
7056 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7057 return;
7060 /* One field is non-constant. Load constant then overwrite
7061 varying field. */
7062 if (n_var == 1)
7064 rtx copy = copy_rtx (vals);
7066 /* Load constant part of vector, substitute neighboring value for
7067 varying element. */
7068 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7069 rs6000_expand_vector_init (target, copy);
7071 /* Insert variable. */
7072 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7073 return;
7076 /* Construct the vector in memory one field at a time
7077 and load the whole vector. */
7078 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7079 for (i = 0; i < n_elts; i++)
7080 emit_move_insn (adjust_address_nv (mem, inner_mode,
7081 i * GET_MODE_SIZE (inner_mode)),
7082 XVECEXP (vals, 0, i));
7083 emit_move_insn (target, mem);
7086 /* Set field ELT of TARGET to VAL. */
7088 void
7089 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7091 machine_mode mode = GET_MODE (target);
7092 machine_mode inner_mode = GET_MODE_INNER (mode);
7093 rtx reg = gen_reg_rtx (mode);
7094 rtx mask, mem, x;
7095 int width = GET_MODE_SIZE (inner_mode);
7096 int i;
7098 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7100 rtx (*set_func) (rtx, rtx, rtx, rtx)
7101 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
7102 emit_insn (set_func (target, target, val, GEN_INT (elt)));
7103 return;
7106 /* Simplify setting single element vectors like V1TImode. */
7107 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7109 emit_move_insn (target, gen_lowpart (mode, val));
7110 return;
7113 /* Load single variable value. */
7114 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7115 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7116 x = gen_rtx_UNSPEC (VOIDmode,
7117 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7118 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7119 gen_rtvec (2,
7120 gen_rtx_SET (reg, mem),
7121 x)));
7123 /* Linear sequence. */
7124 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7125 for (i = 0; i < 16; ++i)
7126 XVECEXP (mask, 0, i) = GEN_INT (i);
7128 /* Set permute mask to insert element into target. */
7129 for (i = 0; i < width; ++i)
7130 XVECEXP (mask, 0, elt*width + i)
7131 = GEN_INT (i + 0x10);
7132 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7134 if (BYTES_BIG_ENDIAN)
7135 x = gen_rtx_UNSPEC (mode,
7136 gen_rtvec (3, target, reg,
7137 force_reg (V16QImode, x)),
7138 UNSPEC_VPERM);
7139 else
7141 if (TARGET_P9_VECTOR)
7142 x = gen_rtx_UNSPEC (mode,
7143 gen_rtvec (3, target, reg,
7144 force_reg (V16QImode, x)),
7145 UNSPEC_VPERMR);
7146 else
7148 /* Invert selector. We prefer to generate VNAND on P8 so
7149 that future fusion opportunities can kick in, but must
7150 generate VNOR elsewhere. */
7151 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7152 rtx iorx = (TARGET_P8_VECTOR
7153 ? gen_rtx_IOR (V16QImode, notx, notx)
7154 : gen_rtx_AND (V16QImode, notx, notx));
7155 rtx tmp = gen_reg_rtx (V16QImode);
7156 emit_insn (gen_rtx_SET (tmp, iorx));
7158 /* Permute with operands reversed and adjusted selector. */
7159 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7160 UNSPEC_VPERM);
7164 emit_insn (gen_rtx_SET (target, x));
7167 /* Extract field ELT from VEC into TARGET. */
7169 void
7170 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7172 machine_mode mode = GET_MODE (vec);
7173 machine_mode inner_mode = GET_MODE_INNER (mode);
7174 rtx mem;
7176 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7178 switch (mode)
7180 default:
7181 break;
7182 case V1TImode:
7183 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7184 emit_move_insn (target, gen_lowpart (TImode, vec));
7185 break;
7186 case V2DFmode:
7187 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7188 return;
7189 case V2DImode:
7190 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7191 return;
7192 case V4SFmode:
7193 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7194 return;
7195 case V16QImode:
7196 if (TARGET_DIRECT_MOVE_64BIT)
7198 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7199 return;
7201 else
7202 break;
7203 case V8HImode:
7204 if (TARGET_DIRECT_MOVE_64BIT)
7206 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7207 return;
7209 else
7210 break;
7211 case V4SImode:
7212 if (TARGET_DIRECT_MOVE_64BIT)
7214 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7215 return;
7217 break;
7220 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7221 && TARGET_DIRECT_MOVE_64BIT)
7223 if (GET_MODE (elt) != DImode)
7225 rtx tmp = gen_reg_rtx (DImode);
7226 convert_move (tmp, elt, 0);
7227 elt = tmp;
7230 switch (mode)
7232 case V2DFmode:
7233 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7234 return;
7236 case V2DImode:
7237 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7238 return;
7240 case V4SFmode:
7241 if (TARGET_UPPER_REGS_SF)
7243 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7244 return;
7246 break;
7248 case V4SImode:
7249 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7250 return;
7252 case V8HImode:
7253 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7254 return;
7256 case V16QImode:
7257 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7258 return;
7260 default:
7261 gcc_unreachable ();
7265 gcc_assert (CONST_INT_P (elt));
7267 /* Allocate mode-sized buffer. */
7268 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7270 emit_move_insn (mem, vec);
7272 /* Add offset to field within buffer matching vector element. */
7273 mem = adjust_address_nv (mem, inner_mode,
7274 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7276 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7279 /* Helper function to return the register number of a RTX. */
7280 static inline int
7281 regno_or_subregno (rtx op)
7283 if (REG_P (op))
7284 return REGNO (op);
7285 else if (SUBREG_P (op))
7286 return subreg_regno (op);
7287 else
7288 gcc_unreachable ();
7291 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7292 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7293 temporary (BASE_TMP) to fixup the address. Return the new memory address
7294 that is valid for reads or writes to a given register (SCALAR_REG). */
7297 rs6000_adjust_vec_address (rtx scalar_reg,
7298 rtx mem,
7299 rtx element,
7300 rtx base_tmp,
7301 machine_mode scalar_mode)
7303 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7304 rtx addr = XEXP (mem, 0);
7305 rtx element_offset;
7306 rtx new_addr;
7307 bool valid_addr_p;
7309 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7310 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7312 /* Calculate what we need to add to the address to get the element
7313 address. */
7314 if (CONST_INT_P (element))
7315 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7316 else
7318 int byte_shift = exact_log2 (scalar_size);
7319 gcc_assert (byte_shift >= 0);
7321 if (byte_shift == 0)
7322 element_offset = element;
7324 else
7326 if (TARGET_POWERPC64)
7327 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7328 else
7329 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7331 element_offset = base_tmp;
7335 /* Create the new address pointing to the element within the vector. If we
7336 are adding 0, we don't have to change the address. */
7337 if (element_offset == const0_rtx)
7338 new_addr = addr;
7340 /* A simple indirect address can be converted into a reg + offset
7341 address. */
7342 else if (REG_P (addr) || SUBREG_P (addr))
7343 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7345 /* Optimize D-FORM addresses with constant offset with a constant element, to
7346 include the element offset in the address directly. */
7347 else if (GET_CODE (addr) == PLUS)
7349 rtx op0 = XEXP (addr, 0);
7350 rtx op1 = XEXP (addr, 1);
7351 rtx insn;
7353 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7354 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7356 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7357 rtx offset_rtx = GEN_INT (offset);
7359 if (IN_RANGE (offset, -32768, 32767)
7360 && (scalar_size < 8 || (offset & 0x3) == 0))
7361 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7362 else
7364 emit_move_insn (base_tmp, offset_rtx);
7365 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7368 else
7370 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7371 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7373 /* Note, ADDI requires the register being added to be a base
7374 register. If the register was R0, load it up into the temporary
7375 and do the add. */
7376 if (op1_reg_p
7377 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7379 insn = gen_add3_insn (base_tmp, op1, element_offset);
7380 gcc_assert (insn != NULL_RTX);
7381 emit_insn (insn);
7384 else if (ele_reg_p
7385 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7387 insn = gen_add3_insn (base_tmp, element_offset, op1);
7388 gcc_assert (insn != NULL_RTX);
7389 emit_insn (insn);
7392 else
7394 emit_move_insn (base_tmp, op1);
7395 emit_insn (gen_add2_insn (base_tmp, element_offset));
7398 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7402 else
7404 emit_move_insn (base_tmp, addr);
7405 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7408 /* If we have a PLUS, we need to see whether the particular register class
7409 allows for D-FORM or X-FORM addressing. */
7410 if (GET_CODE (new_addr) == PLUS)
7412 rtx op1 = XEXP (new_addr, 1);
7413 addr_mask_type addr_mask;
7414 int scalar_regno = regno_or_subregno (scalar_reg);
7416 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7417 if (INT_REGNO_P (scalar_regno))
7418 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7420 else if (FP_REGNO_P (scalar_regno))
7421 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7423 else if (ALTIVEC_REGNO_P (scalar_regno))
7424 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7426 else
7427 gcc_unreachable ();
7429 if (REG_P (op1) || SUBREG_P (op1))
7430 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7431 else
7432 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7435 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7436 valid_addr_p = true;
7438 else
7439 valid_addr_p = false;
7441 if (!valid_addr_p)
7443 emit_move_insn (base_tmp, new_addr);
7444 new_addr = base_tmp;
7447 return change_address (mem, scalar_mode, new_addr);
7450 /* Split a variable vec_extract operation into the component instructions. */
7452 void
7453 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7454 rtx tmp_altivec)
7456 machine_mode mode = GET_MODE (src);
7457 machine_mode scalar_mode = GET_MODE (dest);
7458 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7459 int byte_shift = exact_log2 (scalar_size);
7461 gcc_assert (byte_shift >= 0);
7463 /* If we are given a memory address, optimize to load just the element. We
7464 don't have to adjust the vector element number on little endian
7465 systems. */
7466 if (MEM_P (src))
7468 gcc_assert (REG_P (tmp_gpr));
7469 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7470 tmp_gpr, scalar_mode));
7471 return;
7474 else if (REG_P (src) || SUBREG_P (src))
7476 int bit_shift = byte_shift + 3;
7477 rtx element2;
7479 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7481 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7482 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7483 will shift the element into the upper position (adding 3 to convert a
7484 byte shift into a bit shift). */
7485 if (scalar_size == 8)
7487 if (!VECTOR_ELT_ORDER_BIG)
7489 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7490 element2 = tmp_gpr;
7492 else
7493 element2 = element;
7495 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7496 bit. */
7497 emit_insn (gen_rtx_SET (tmp_gpr,
7498 gen_rtx_AND (DImode,
7499 gen_rtx_ASHIFT (DImode,
7500 element2,
7501 GEN_INT (6)),
7502 GEN_INT (64))));
7504 else
7506 if (!VECTOR_ELT_ORDER_BIG)
7508 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7510 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7511 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7512 element2 = tmp_gpr;
7514 else
7515 element2 = element;
7517 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7520 /* Get the value into the lower byte of the Altivec register where VSLO
7521 expects it. */
7522 if (TARGET_P9_VECTOR)
7523 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7524 else if (can_create_pseudo_p ())
7525 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7526 else
7528 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7529 emit_move_insn (tmp_di, tmp_gpr);
7530 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7533 /* Do the VSLO to get the value into the final location. */
7534 switch (mode)
7536 case V2DFmode:
7537 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7538 return;
7540 case V2DImode:
7541 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7542 return;
7544 case V4SFmode:
7546 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7547 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7548 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7549 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7550 tmp_altivec));
7552 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7553 return;
7556 case V4SImode:
7557 case V8HImode:
7558 case V16QImode:
7560 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7561 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7562 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7563 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7564 tmp_altivec));
7565 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7566 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7567 GEN_INT (64 - (8 * scalar_size))));
7568 return;
7571 default:
7572 gcc_unreachable ();
7575 return;
7577 else
7578 gcc_unreachable ();
7581 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7582 two SImode values. */
7584 static void
7585 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7587 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7589 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7591 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7592 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7594 emit_move_insn (dest, GEN_INT (const1 | const2));
7595 return;
7598 /* Put si1 into upper 32-bits of dest. */
7599 if (CONST_INT_P (si1))
7600 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7601 else
7603 /* Generate RLDIC. */
7604 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7605 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7606 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7607 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7608 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7609 emit_insn (gen_rtx_SET (dest, and_rtx));
7612 /* Put si2 into the temporary. */
7613 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7614 if (CONST_INT_P (si2))
7615 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7616 else
7617 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7619 /* Combine the two parts. */
7620 emit_insn (gen_iordi3 (dest, dest, tmp));
7621 return;
7624 /* Split a V4SI initialization. */
7626 void
7627 rs6000_split_v4si_init (rtx operands[])
7629 rtx dest = operands[0];
7631 /* Destination is a GPR, build up the two DImode parts in place. */
7632 if (REG_P (dest) || SUBREG_P (dest))
7634 int d_regno = regno_or_subregno (dest);
7635 rtx scalar1 = operands[1];
7636 rtx scalar2 = operands[2];
7637 rtx scalar3 = operands[3];
7638 rtx scalar4 = operands[4];
7639 rtx tmp1 = operands[5];
7640 rtx tmp2 = operands[6];
7642 /* Even though we only need one temporary (plus the destination, which
7643 has an early clobber constraint, try to use two temporaries, one for
7644 each double word created. That way the 2nd insn scheduling pass can
7645 rearrange things so the two parts are done in parallel. */
7646 if (BYTES_BIG_ENDIAN)
7648 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7649 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7650 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7651 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7653 else
7655 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7656 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7657 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7658 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7659 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7661 return;
7664 else
7665 gcc_unreachable ();
7668 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7670 bool
7671 invalid_e500_subreg (rtx op, machine_mode mode)
7673 if (TARGET_E500_DOUBLE)
7675 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7676 subreg:TI and reg:TF. Decimal float modes are like integer
7677 modes (only low part of each register used) for this
7678 purpose. */
7679 if (GET_CODE (op) == SUBREG
7680 && (mode == SImode || mode == DImode || mode == TImode
7681 || mode == DDmode || mode == TDmode || mode == PTImode)
7682 && REG_P (SUBREG_REG (op))
7683 && (GET_MODE (SUBREG_REG (op)) == DFmode
7684 || GET_MODE (SUBREG_REG (op)) == TFmode
7685 || GET_MODE (SUBREG_REG (op)) == IFmode
7686 || GET_MODE (SUBREG_REG (op)) == KFmode))
7687 return true;
7689 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7690 reg:TI. */
7691 if (GET_CODE (op) == SUBREG
7692 && (mode == DFmode || mode == TFmode || mode == IFmode
7693 || mode == KFmode)
7694 && REG_P (SUBREG_REG (op))
7695 && (GET_MODE (SUBREG_REG (op)) == DImode
7696 || GET_MODE (SUBREG_REG (op)) == TImode
7697 || GET_MODE (SUBREG_REG (op)) == PTImode
7698 || GET_MODE (SUBREG_REG (op)) == DDmode
7699 || GET_MODE (SUBREG_REG (op)) == TDmode))
7700 return true;
7703 if (TARGET_SPE
7704 && GET_CODE (op) == SUBREG
7705 && mode == SImode
7706 && REG_P (SUBREG_REG (op))
7707 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7708 return true;
7710 return false;
7713 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7714 selects whether the alignment is abi mandated, optional, or
7715 both abi and optional alignment. */
7717 unsigned int
7718 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7720 if (how != align_opt)
7722 if (TREE_CODE (type) == VECTOR_TYPE)
7724 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7725 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7727 if (align < 64)
7728 align = 64;
7730 else if (align < 128)
7731 align = 128;
7733 else if (TARGET_E500_DOUBLE
7734 && TREE_CODE (type) == REAL_TYPE
7735 && TYPE_MODE (type) == DFmode)
7737 if (align < 64)
7738 align = 64;
7742 if (how != align_abi)
7744 if (TREE_CODE (type) == ARRAY_TYPE
7745 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7747 if (align < BITS_PER_WORD)
7748 align = BITS_PER_WORD;
7752 return align;
7755 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7757 bool
7758 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7760 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7762 if (computed != 128)
7764 static bool warned;
7765 if (!warned && warn_psabi)
7767 warned = true;
7768 inform (input_location,
7769 "the layout of aggregates containing vectors with"
7770 " %d-byte alignment has changed in GCC 5",
7771 computed / BITS_PER_UNIT);
7774 /* In current GCC there is no special case. */
7775 return false;
7778 return false;
7781 /* AIX increases natural record alignment to doubleword if the first
7782 field is an FP double while the FP fields remain word aligned. */
7784 unsigned int
7785 rs6000_special_round_type_align (tree type, unsigned int computed,
7786 unsigned int specified)
7788 unsigned int align = MAX (computed, specified);
7789 tree field = TYPE_FIELDS (type);
7791 /* Skip all non field decls */
7792 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7793 field = DECL_CHAIN (field);
7795 if (field != NULL && field != type)
7797 type = TREE_TYPE (field);
7798 while (TREE_CODE (type) == ARRAY_TYPE)
7799 type = TREE_TYPE (type);
7801 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7802 align = MAX (align, 64);
7805 return align;
7808 /* Darwin increases record alignment to the natural alignment of
7809 the first field. */
7811 unsigned int
7812 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7813 unsigned int specified)
7815 unsigned int align = MAX (computed, specified);
7817 if (TYPE_PACKED (type))
7818 return align;
7820 /* Find the first field, looking down into aggregates. */
7821 do {
7822 tree field = TYPE_FIELDS (type);
7823 /* Skip all non field decls */
7824 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7825 field = DECL_CHAIN (field);
7826 if (! field)
7827 break;
7828 /* A packed field does not contribute any extra alignment. */
7829 if (DECL_PACKED (field))
7830 return align;
7831 type = TREE_TYPE (field);
7832 while (TREE_CODE (type) == ARRAY_TYPE)
7833 type = TREE_TYPE (type);
7834 } while (AGGREGATE_TYPE_P (type));
7836 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7837 align = MAX (align, TYPE_ALIGN (type));
7839 return align;
7842 /* Return 1 for an operand in small memory on V.4/eabi. */
7845 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7846 machine_mode mode ATTRIBUTE_UNUSED)
7848 #if TARGET_ELF
7849 rtx sym_ref;
7851 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7852 return 0;
7854 if (DEFAULT_ABI != ABI_V4)
7855 return 0;
7857 /* Vector and float memory instructions have a limited offset on the
7858 SPE, so using a vector or float variable directly as an operand is
7859 not useful. */
7860 if (TARGET_SPE
7861 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7862 return 0;
7864 if (GET_CODE (op) == SYMBOL_REF)
7865 sym_ref = op;
7867 else if (GET_CODE (op) != CONST
7868 || GET_CODE (XEXP (op, 0)) != PLUS
7869 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7870 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7871 return 0;
7873 else
7875 rtx sum = XEXP (op, 0);
7876 HOST_WIDE_INT summand;
7878 /* We have to be careful here, because it is the referenced address
7879 that must be 32k from _SDA_BASE_, not just the symbol. */
7880 summand = INTVAL (XEXP (sum, 1));
7881 if (summand < 0 || summand > g_switch_value)
7882 return 0;
7884 sym_ref = XEXP (sum, 0);
7887 return SYMBOL_REF_SMALL_P (sym_ref);
7888 #else
7889 return 0;
7890 #endif
7893 /* Return true if either operand is a general purpose register. */
7895 bool
7896 gpr_or_gpr_p (rtx op0, rtx op1)
7898 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7899 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7902 /* Return true if this is a move direct operation between GPR registers and
7903 floating point/VSX registers. */
7905 bool
7906 direct_move_p (rtx op0, rtx op1)
7908 int regno0, regno1;
7910 if (!REG_P (op0) || !REG_P (op1))
7911 return false;
7913 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7914 return false;
7916 regno0 = REGNO (op0);
7917 regno1 = REGNO (op1);
7918 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7919 return false;
7921 if (INT_REGNO_P (regno0))
7922 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7924 else if (INT_REGNO_P (regno1))
7926 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7927 return true;
7929 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7930 return true;
7933 return false;
7936 /* Return true if the OFFSET is valid for the quad address instructions that
7937 use d-form (register + offset) addressing. */
7939 static inline bool
7940 quad_address_offset_p (HOST_WIDE_INT offset)
7942 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7945 /* Return true if the ADDR is an acceptable address for a quad memory
7946 operation of mode MODE (either LQ/STQ for general purpose registers, or
7947 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7948 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7949 3.0 LXV/STXV instruction. */
7951 bool
7952 quad_address_p (rtx addr, machine_mode mode, bool strict)
7954 rtx op0, op1;
7956 if (GET_MODE_SIZE (mode) != 16)
7957 return false;
7959 if (legitimate_indirect_address_p (addr, strict))
7960 return true;
7962 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7963 return false;
7965 if (GET_CODE (addr) != PLUS)
7966 return false;
7968 op0 = XEXP (addr, 0);
7969 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7970 return false;
7972 op1 = XEXP (addr, 1);
7973 if (!CONST_INT_P (op1))
7974 return false;
7976 return quad_address_offset_p (INTVAL (op1));
7979 /* Return true if this is a load or store quad operation. This function does
7980 not handle the atomic quad memory instructions. */
7982 bool
7983 quad_load_store_p (rtx op0, rtx op1)
7985 bool ret;
7987 if (!TARGET_QUAD_MEMORY)
7988 ret = false;
7990 else if (REG_P (op0) && MEM_P (op1))
7991 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7992 && quad_memory_operand (op1, GET_MODE (op1))
7993 && !reg_overlap_mentioned_p (op0, op1));
7995 else if (MEM_P (op0) && REG_P (op1))
7996 ret = (quad_memory_operand (op0, GET_MODE (op0))
7997 && quad_int_reg_operand (op1, GET_MODE (op1)));
7999 else
8000 ret = false;
8002 if (TARGET_DEBUG_ADDR)
8004 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8005 ret ? "true" : "false");
8006 debug_rtx (gen_rtx_SET (op0, op1));
8009 return ret;
8012 /* Given an address, return a constant offset term if one exists. */
8014 static rtx
8015 address_offset (rtx op)
8017 if (GET_CODE (op) == PRE_INC
8018 || GET_CODE (op) == PRE_DEC)
8019 op = XEXP (op, 0);
8020 else if (GET_CODE (op) == PRE_MODIFY
8021 || GET_CODE (op) == LO_SUM)
8022 op = XEXP (op, 1);
8024 if (GET_CODE (op) == CONST)
8025 op = XEXP (op, 0);
8027 if (GET_CODE (op) == PLUS)
8028 op = XEXP (op, 1);
8030 if (CONST_INT_P (op))
8031 return op;
8033 return NULL_RTX;
8036 /* Return true if the MEM operand is a memory operand suitable for use
8037 with a (full width, possibly multiple) gpr load/store. On
8038 powerpc64 this means the offset must be divisible by 4.
8039 Implements 'Y' constraint.
8041 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8042 a constraint function we know the operand has satisfied a suitable
8043 memory predicate. Also accept some odd rtl generated by reload
8044 (see rs6000_legitimize_reload_address for various forms). It is
8045 important that reload rtl be accepted by appropriate constraints
8046 but not by the operand predicate.
8048 Offsetting a lo_sum should not be allowed, except where we know by
8049 alignment that a 32k boundary is not crossed, but see the ???
8050 comment in rs6000_legitimize_reload_address. Note that by
8051 "offsetting" here we mean a further offset to access parts of the
8052 MEM. It's fine to have a lo_sum where the inner address is offset
8053 from a sym, since the same sym+offset will appear in the high part
8054 of the address calculation. */
8056 bool
8057 mem_operand_gpr (rtx op, machine_mode mode)
8059 unsigned HOST_WIDE_INT offset;
8060 int extra;
8061 rtx addr = XEXP (op, 0);
8063 op = address_offset (addr);
8064 if (op == NULL_RTX)
8065 return true;
8067 offset = INTVAL (op);
8068 if (TARGET_POWERPC64 && (offset & 3) != 0)
8069 return false;
8071 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8072 if (extra < 0)
8073 extra = 0;
8075 if (GET_CODE (addr) == LO_SUM)
8076 /* For lo_sum addresses, we must allow any offset except one that
8077 causes a wrap, so test only the low 16 bits. */
8078 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8080 return offset + 0x8000 < 0x10000u - extra;
8083 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8084 enforce an offset divisible by 4 even for 32-bit. */
8086 bool
8087 mem_operand_ds_form (rtx op, machine_mode mode)
8089 unsigned HOST_WIDE_INT offset;
8090 int extra;
8091 rtx addr = XEXP (op, 0);
8093 if (!offsettable_address_p (false, mode, addr))
8094 return false;
8096 op = address_offset (addr);
8097 if (op == NULL_RTX)
8098 return true;
8100 offset = INTVAL (op);
8101 if ((offset & 3) != 0)
8102 return false;
8104 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8105 if (extra < 0)
8106 extra = 0;
8108 if (GET_CODE (addr) == LO_SUM)
8109 /* For lo_sum addresses, we must allow any offset except one that
8110 causes a wrap, so test only the low 16 bits. */
8111 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8113 return offset + 0x8000 < 0x10000u - extra;
8116 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8118 static bool
8119 reg_offset_addressing_ok_p (machine_mode mode)
8121 switch (mode)
8123 case V16QImode:
8124 case V8HImode:
8125 case V4SFmode:
8126 case V4SImode:
8127 case V2DFmode:
8128 case V2DImode:
8129 case V1TImode:
8130 case TImode:
8131 case TFmode:
8132 case KFmode:
8133 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8134 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8135 a vector mode, if we want to use the VSX registers to move it around,
8136 we need to restrict ourselves to reg+reg addressing. Similarly for
8137 IEEE 128-bit floating point that is passed in a single vector
8138 register. */
8139 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8140 return mode_supports_vsx_dform_quad (mode);
8141 break;
8143 case V4HImode:
8144 case V2SImode:
8145 case V1DImode:
8146 case V2SFmode:
8147 /* Paired vector modes. Only reg+reg addressing is valid. */
8148 if (TARGET_PAIRED_FLOAT)
8149 return false;
8150 break;
8152 case SDmode:
8153 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8154 addressing for the LFIWZX and STFIWX instructions. */
8155 if (TARGET_NO_SDMODE_STACK)
8156 return false;
8157 break;
8159 default:
8160 break;
8163 return true;
8166 static bool
8167 virtual_stack_registers_memory_p (rtx op)
8169 int regnum;
8171 if (GET_CODE (op) == REG)
8172 regnum = REGNO (op);
8174 else if (GET_CODE (op) == PLUS
8175 && GET_CODE (XEXP (op, 0)) == REG
8176 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8177 regnum = REGNO (XEXP (op, 0));
8179 else
8180 return false;
8182 return (regnum >= FIRST_VIRTUAL_REGISTER
8183 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8186 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8187 is known to not straddle a 32k boundary. This function is used
8188 to determine whether -mcmodel=medium code can use TOC pointer
8189 relative addressing for OP. This means the alignment of the TOC
8190 pointer must also be taken into account, and unfortunately that is
8191 only 8 bytes. */
8193 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8194 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8195 #endif
8197 static bool
8198 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8199 machine_mode mode)
8201 tree decl;
8202 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8204 if (GET_CODE (op) != SYMBOL_REF)
8205 return false;
8207 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8208 SYMBOL_REF. */
8209 if (mode_supports_vsx_dform_quad (mode))
8210 return false;
8212 dsize = GET_MODE_SIZE (mode);
8213 decl = SYMBOL_REF_DECL (op);
8214 if (!decl)
8216 if (dsize == 0)
8217 return false;
8219 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8220 replacing memory addresses with an anchor plus offset. We
8221 could find the decl by rummaging around in the block->objects
8222 VEC for the given offset but that seems like too much work. */
8223 dalign = BITS_PER_UNIT;
8224 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8225 && SYMBOL_REF_ANCHOR_P (op)
8226 && SYMBOL_REF_BLOCK (op) != NULL)
8228 struct object_block *block = SYMBOL_REF_BLOCK (op);
8230 dalign = block->alignment;
8231 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8233 else if (CONSTANT_POOL_ADDRESS_P (op))
8235 /* It would be nice to have get_pool_align().. */
8236 machine_mode cmode = get_pool_mode (op);
8238 dalign = GET_MODE_ALIGNMENT (cmode);
8241 else if (DECL_P (decl))
8243 dalign = DECL_ALIGN (decl);
8245 if (dsize == 0)
8247 /* Allow BLKmode when the entire object is known to not
8248 cross a 32k boundary. */
8249 if (!DECL_SIZE_UNIT (decl))
8250 return false;
8252 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8253 return false;
8255 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8256 if (dsize > 32768)
8257 return false;
8259 dalign /= BITS_PER_UNIT;
8260 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8261 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8262 return dalign >= dsize;
8265 else
8266 gcc_unreachable ();
8268 /* Find how many bits of the alignment we know for this access. */
8269 dalign /= BITS_PER_UNIT;
8270 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8271 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8272 mask = dalign - 1;
8273 lsb = offset & -offset;
8274 mask &= lsb - 1;
8275 dalign = mask + 1;
8277 return dalign >= dsize;
8280 static bool
8281 constant_pool_expr_p (rtx op)
8283 rtx base, offset;
8285 split_const (op, &base, &offset);
8286 return (GET_CODE (base) == SYMBOL_REF
8287 && CONSTANT_POOL_ADDRESS_P (base)
8288 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8291 static const_rtx tocrel_base, tocrel_offset;
8293 /* Return true if OP is a toc pointer relative address (the output
8294 of create_TOC_reference). If STRICT, do not match non-split
8295 -mcmodel=large/medium toc pointer relative addresses. */
8297 bool
8298 toc_relative_expr_p (const_rtx op, bool strict)
8300 if (!TARGET_TOC)
8301 return false;
8303 if (TARGET_CMODEL != CMODEL_SMALL)
8305 /* When strict ensure we have everything tidy. */
8306 if (strict
8307 && !(GET_CODE (op) == LO_SUM
8308 && REG_P (XEXP (op, 0))
8309 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8310 return false;
8312 /* When not strict, allow non-split TOC addresses and also allow
8313 (lo_sum (high ..)) TOC addresses created during reload. */
8314 if (GET_CODE (op) == LO_SUM)
8315 op = XEXP (op, 1);
8318 tocrel_base = op;
8319 tocrel_offset = const0_rtx;
8320 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8322 tocrel_base = XEXP (op, 0);
8323 tocrel_offset = XEXP (op, 1);
8326 return (GET_CODE (tocrel_base) == UNSPEC
8327 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8330 /* Return true if X is a constant pool address, and also for cmodel=medium
8331 if X is a toc-relative address known to be offsettable within MODE. */
8333 bool
8334 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8335 bool strict)
8337 return (toc_relative_expr_p (x, strict)
8338 && (TARGET_CMODEL != CMODEL_MEDIUM
8339 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8340 || mode == QImode
8341 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8342 INTVAL (tocrel_offset), mode)));
8345 static bool
8346 legitimate_small_data_p (machine_mode mode, rtx x)
8348 return (DEFAULT_ABI == ABI_V4
8349 && !flag_pic && !TARGET_TOC
8350 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8351 && small_data_operand (x, mode));
8354 /* SPE offset addressing is limited to 5-bits worth of double words. */
8355 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8357 bool
8358 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8359 bool strict, bool worst_case)
8361 unsigned HOST_WIDE_INT offset;
8362 unsigned int extra;
8364 if (GET_CODE (x) != PLUS)
8365 return false;
8366 if (!REG_P (XEXP (x, 0)))
8367 return false;
8368 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8369 return false;
8370 if (mode_supports_vsx_dform_quad (mode))
8371 return quad_address_p (x, mode, strict);
8372 if (!reg_offset_addressing_ok_p (mode))
8373 return virtual_stack_registers_memory_p (x);
8374 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8375 return true;
8376 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8377 return false;
8379 offset = INTVAL (XEXP (x, 1));
8380 extra = 0;
8381 switch (mode)
8383 case V4HImode:
8384 case V2SImode:
8385 case V1DImode:
8386 case V2SFmode:
8387 /* SPE vector modes. */
8388 return SPE_CONST_OFFSET_OK (offset);
8390 case DFmode:
8391 case DDmode:
8392 case DImode:
8393 /* On e500v2, we may have:
8395 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8397 Which gets addressed with evldd instructions. */
8398 if (TARGET_E500_DOUBLE)
8399 return SPE_CONST_OFFSET_OK (offset);
8401 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8402 addressing. */
8403 if (VECTOR_MEM_VSX_P (mode))
8404 return false;
8406 if (!worst_case)
8407 break;
8408 if (!TARGET_POWERPC64)
8409 extra = 4;
8410 else if (offset & 3)
8411 return false;
8412 break;
8414 case TFmode:
8415 case IFmode:
8416 case KFmode:
8417 if (TARGET_E500_DOUBLE)
8418 return (SPE_CONST_OFFSET_OK (offset)
8419 && SPE_CONST_OFFSET_OK (offset + 8));
8420 /* fall through */
8422 case TDmode:
8423 case TImode:
8424 case PTImode:
8425 extra = 8;
8426 if (!worst_case)
8427 break;
8428 if (!TARGET_POWERPC64)
8429 extra = 12;
8430 else if (offset & 3)
8431 return false;
8432 break;
8434 default:
8435 break;
8438 offset += 0x8000;
8439 return offset < 0x10000 - extra;
8442 bool
8443 legitimate_indexed_address_p (rtx x, int strict)
8445 rtx op0, op1;
8447 if (GET_CODE (x) != PLUS)
8448 return false;
8450 op0 = XEXP (x, 0);
8451 op1 = XEXP (x, 1);
8453 /* Recognize the rtl generated by reload which we know will later be
8454 replaced with proper base and index regs. */
8455 if (!strict
8456 && reload_in_progress
8457 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8458 && REG_P (op1))
8459 return true;
8461 return (REG_P (op0) && REG_P (op1)
8462 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8463 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8464 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8465 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8468 bool
8469 avoiding_indexed_address_p (machine_mode mode)
8471 /* Avoid indexed addressing for modes that have non-indexed
8472 load/store instruction forms. */
8473 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8476 bool
8477 legitimate_indirect_address_p (rtx x, int strict)
8479 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8482 bool
8483 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8485 if (!TARGET_MACHO || !flag_pic
8486 || mode != SImode || GET_CODE (x) != MEM)
8487 return false;
8488 x = XEXP (x, 0);
8490 if (GET_CODE (x) != LO_SUM)
8491 return false;
8492 if (GET_CODE (XEXP (x, 0)) != REG)
8493 return false;
8494 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8495 return false;
8496 x = XEXP (x, 1);
8498 return CONSTANT_P (x);
8501 static bool
8502 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8504 if (GET_CODE (x) != LO_SUM)
8505 return false;
8506 if (GET_CODE (XEXP (x, 0)) != REG)
8507 return false;
8508 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8509 return false;
8510 /* quad word addresses are restricted, and we can't use LO_SUM. */
8511 if (mode_supports_vsx_dform_quad (mode))
8512 return false;
8513 /* Restrict addressing for DI because of our SUBREG hackery. */
8514 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8515 return false;
8516 x = XEXP (x, 1);
8518 if (TARGET_ELF || TARGET_MACHO)
8520 bool large_toc_ok;
8522 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8523 return false;
8524 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8525 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8526 recognizes some LO_SUM addresses as valid although this
8527 function says opposite. In most cases, LRA through different
8528 transformations can generate correct code for address reloads.
8529 It can not manage only some LO_SUM cases. So we need to add
8530 code analogous to one in rs6000_legitimize_reload_address for
8531 LOW_SUM here saying that some addresses are still valid. */
8532 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8533 && small_toc_ref (x, VOIDmode));
8534 if (TARGET_TOC && ! large_toc_ok)
8535 return false;
8536 if (GET_MODE_NUNITS (mode) != 1)
8537 return false;
8538 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8539 && !(/* ??? Assume floating point reg based on mode? */
8540 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8541 && (mode == DFmode || mode == DDmode)))
8542 return false;
8544 return CONSTANT_P (x) || large_toc_ok;
8547 return false;
8551 /* Try machine-dependent ways of modifying an illegitimate address
8552 to be legitimate. If we find one, return the new, valid address.
8553 This is used from only one place: `memory_address' in explow.c.
8555 OLDX is the address as it was before break_out_memory_refs was
8556 called. In some cases it is useful to look at this to decide what
8557 needs to be done.
8559 It is always safe for this function to do nothing. It exists to
8560 recognize opportunities to optimize the output.
8562 On RS/6000, first check for the sum of a register with a constant
8563 integer that is out of range. If so, generate code to add the
8564 constant with the low-order 16 bits masked to the register and force
8565 this result into another register (this can be done with `cau').
8566 Then generate an address of REG+(CONST&0xffff), allowing for the
8567 possibility of bit 16 being a one.
8569 Then check for the sum of a register and something not constant, try to
8570 load the other things into a register and return the sum. */
8572 static rtx
8573 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8574 machine_mode mode)
8576 unsigned int extra;
8578 if (!reg_offset_addressing_ok_p (mode)
8579 || mode_supports_vsx_dform_quad (mode))
8581 if (virtual_stack_registers_memory_p (x))
8582 return x;
8584 /* In theory we should not be seeing addresses of the form reg+0,
8585 but just in case it is generated, optimize it away. */
8586 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8587 return force_reg (Pmode, XEXP (x, 0));
8589 /* For TImode with load/store quad, restrict addresses to just a single
8590 pointer, so it works with both GPRs and VSX registers. */
8591 /* Make sure both operands are registers. */
8592 else if (GET_CODE (x) == PLUS
8593 && (mode != TImode || !TARGET_VSX_TIMODE))
8594 return gen_rtx_PLUS (Pmode,
8595 force_reg (Pmode, XEXP (x, 0)),
8596 force_reg (Pmode, XEXP (x, 1)));
8597 else
8598 return force_reg (Pmode, x);
8600 if (GET_CODE (x) == SYMBOL_REF)
8602 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8603 if (model != 0)
8604 return rs6000_legitimize_tls_address (x, model);
8607 extra = 0;
8608 switch (mode)
8610 case TFmode:
8611 case TDmode:
8612 case TImode:
8613 case PTImode:
8614 case IFmode:
8615 case KFmode:
8616 /* As in legitimate_offset_address_p we do not assume
8617 worst-case. The mode here is just a hint as to the registers
8618 used. A TImode is usually in gprs, but may actually be in
8619 fprs. Leave worst-case scenario for reload to handle via
8620 insn constraints. PTImode is only GPRs. */
8621 extra = 8;
8622 break;
8623 default:
8624 break;
8627 if (GET_CODE (x) == PLUS
8628 && GET_CODE (XEXP (x, 0)) == REG
8629 && GET_CODE (XEXP (x, 1)) == CONST_INT
8630 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8631 >= 0x10000 - extra)
8632 && !(SPE_VECTOR_MODE (mode)
8633 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8635 HOST_WIDE_INT high_int, low_int;
8636 rtx sum;
8637 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8638 if (low_int >= 0x8000 - extra)
8639 low_int = 0;
8640 high_int = INTVAL (XEXP (x, 1)) - low_int;
8641 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8642 GEN_INT (high_int)), 0);
8643 return plus_constant (Pmode, sum, low_int);
8645 else if (GET_CODE (x) == PLUS
8646 && GET_CODE (XEXP (x, 0)) == REG
8647 && GET_CODE (XEXP (x, 1)) != CONST_INT
8648 && GET_MODE_NUNITS (mode) == 1
8649 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8650 || (/* ??? Assume floating point reg based on mode? */
8651 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8652 && (mode == DFmode || mode == DDmode)))
8653 && !avoiding_indexed_address_p (mode))
8655 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8656 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8658 else if (SPE_VECTOR_MODE (mode)
8659 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8661 if (mode == DImode)
8662 return x;
8663 /* We accept [reg + reg] and [reg + OFFSET]. */
8665 if (GET_CODE (x) == PLUS)
8667 rtx op1 = XEXP (x, 0);
8668 rtx op2 = XEXP (x, 1);
8669 rtx y;
8671 op1 = force_reg (Pmode, op1);
8673 if (GET_CODE (op2) != REG
8674 && (GET_CODE (op2) != CONST_INT
8675 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8676 || (GET_MODE_SIZE (mode) > 8
8677 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8678 op2 = force_reg (Pmode, op2);
8680 /* We can't always do [reg + reg] for these, because [reg +
8681 reg + offset] is not a legitimate addressing mode. */
8682 y = gen_rtx_PLUS (Pmode, op1, op2);
8684 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8685 return force_reg (Pmode, y);
8686 else
8687 return y;
8690 return force_reg (Pmode, x);
8692 else if ((TARGET_ELF
8693 #if TARGET_MACHO
8694 || !MACHO_DYNAMIC_NO_PIC_P
8695 #endif
8697 && TARGET_32BIT
8698 && TARGET_NO_TOC
8699 && ! flag_pic
8700 && GET_CODE (x) != CONST_INT
8701 && GET_CODE (x) != CONST_WIDE_INT
8702 && GET_CODE (x) != CONST_DOUBLE
8703 && CONSTANT_P (x)
8704 && GET_MODE_NUNITS (mode) == 1
8705 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8706 || (/* ??? Assume floating point reg based on mode? */
8707 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8708 && (mode == DFmode || mode == DDmode))))
8710 rtx reg = gen_reg_rtx (Pmode);
8711 if (TARGET_ELF)
8712 emit_insn (gen_elf_high (reg, x));
8713 else
8714 emit_insn (gen_macho_high (reg, x));
8715 return gen_rtx_LO_SUM (Pmode, reg, x);
8717 else if (TARGET_TOC
8718 && GET_CODE (x) == SYMBOL_REF
8719 && constant_pool_expr_p (x)
8720 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8721 return create_TOC_reference (x, NULL_RTX);
8722 else
8723 return x;
8726 /* Debug version of rs6000_legitimize_address. */
8727 static rtx
8728 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8730 rtx ret;
8731 rtx_insn *insns;
8733 start_sequence ();
8734 ret = rs6000_legitimize_address (x, oldx, mode);
8735 insns = get_insns ();
8736 end_sequence ();
8738 if (ret != x)
8740 fprintf (stderr,
8741 "\nrs6000_legitimize_address: mode %s, old code %s, "
8742 "new code %s, modified\n",
8743 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8744 GET_RTX_NAME (GET_CODE (ret)));
8746 fprintf (stderr, "Original address:\n");
8747 debug_rtx (x);
8749 fprintf (stderr, "oldx:\n");
8750 debug_rtx (oldx);
8752 fprintf (stderr, "New address:\n");
8753 debug_rtx (ret);
8755 if (insns)
8757 fprintf (stderr, "Insns added:\n");
8758 debug_rtx_list (insns, 20);
8761 else
8763 fprintf (stderr,
8764 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8765 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8767 debug_rtx (x);
8770 if (insns)
8771 emit_insn (insns);
8773 return ret;
8776 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8777 We need to emit DTP-relative relocations. */
8779 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8780 static void
8781 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8783 switch (size)
8785 case 4:
8786 fputs ("\t.long\t", file);
8787 break;
8788 case 8:
8789 fputs (DOUBLE_INT_ASM_OP, file);
8790 break;
8791 default:
8792 gcc_unreachable ();
8794 output_addr_const (file, x);
8795 if (TARGET_ELF)
8796 fputs ("@dtprel+0x8000", file);
8797 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8799 switch (SYMBOL_REF_TLS_MODEL (x))
8801 case 0:
8802 break;
8803 case TLS_MODEL_LOCAL_EXEC:
8804 fputs ("@le", file);
8805 break;
8806 case TLS_MODEL_INITIAL_EXEC:
8807 fputs ("@ie", file);
8808 break;
8809 case TLS_MODEL_GLOBAL_DYNAMIC:
8810 case TLS_MODEL_LOCAL_DYNAMIC:
8811 fputs ("@m", file);
8812 break;
8813 default:
8814 gcc_unreachable ();
8819 /* Return true if X is a symbol that refers to real (rather than emulated)
8820 TLS. */
8822 static bool
8823 rs6000_real_tls_symbol_ref_p (rtx x)
8825 return (GET_CODE (x) == SYMBOL_REF
8826 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8829 /* In the name of slightly smaller debug output, and to cater to
8830 general assembler lossage, recognize various UNSPEC sequences
8831 and turn them back into a direct symbol reference. */
8833 static rtx
8834 rs6000_delegitimize_address (rtx orig_x)
8836 rtx x, y, offset;
8838 orig_x = delegitimize_mem_from_attrs (orig_x);
8839 x = orig_x;
8840 if (MEM_P (x))
8841 x = XEXP (x, 0);
8843 y = x;
8844 if (TARGET_CMODEL != CMODEL_SMALL
8845 && GET_CODE (y) == LO_SUM)
8846 y = XEXP (y, 1);
8848 offset = NULL_RTX;
8849 if (GET_CODE (y) == PLUS
8850 && GET_MODE (y) == Pmode
8851 && CONST_INT_P (XEXP (y, 1)))
8853 offset = XEXP (y, 1);
8854 y = XEXP (y, 0);
8857 if (GET_CODE (y) == UNSPEC
8858 && XINT (y, 1) == UNSPEC_TOCREL)
8860 y = XVECEXP (y, 0, 0);
8862 #ifdef HAVE_AS_TLS
8863 /* Do not associate thread-local symbols with the original
8864 constant pool symbol. */
8865 if (TARGET_XCOFF
8866 && GET_CODE (y) == SYMBOL_REF
8867 && CONSTANT_POOL_ADDRESS_P (y)
8868 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8869 return orig_x;
8870 #endif
8872 if (offset != NULL_RTX)
8873 y = gen_rtx_PLUS (Pmode, y, offset);
8874 if (!MEM_P (orig_x))
8875 return y;
8876 else
8877 return replace_equiv_address_nv (orig_x, y);
8880 if (TARGET_MACHO
8881 && GET_CODE (orig_x) == LO_SUM
8882 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8884 y = XEXP (XEXP (orig_x, 1), 0);
8885 if (GET_CODE (y) == UNSPEC
8886 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8887 return XVECEXP (y, 0, 0);
8890 return orig_x;
8893 /* Return true if X shouldn't be emitted into the debug info.
8894 The linker doesn't like .toc section references from
8895 .debug_* sections, so reject .toc section symbols. */
8897 static bool
8898 rs6000_const_not_ok_for_debug_p (rtx x)
8900 if (GET_CODE (x) == SYMBOL_REF
8901 && CONSTANT_POOL_ADDRESS_P (x))
8903 rtx c = get_pool_constant (x);
8904 machine_mode cmode = get_pool_mode (x);
8905 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8906 return true;
8909 return false;
8912 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8914 static GTY(()) rtx rs6000_tls_symbol;
8915 static rtx
8916 rs6000_tls_get_addr (void)
8918 if (!rs6000_tls_symbol)
8919 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8921 return rs6000_tls_symbol;
8924 /* Construct the SYMBOL_REF for TLS GOT references. */
8926 static GTY(()) rtx rs6000_got_symbol;
8927 static rtx
8928 rs6000_got_sym (void)
8930 if (!rs6000_got_symbol)
8932 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8933 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8934 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8937 return rs6000_got_symbol;
8940 /* AIX Thread-Local Address support. */
8942 static rtx
8943 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8945 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8946 const char *name;
8947 char *tlsname;
8949 name = XSTR (addr, 0);
8950 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8951 or the symbol will be in TLS private data section. */
8952 if (name[strlen (name) - 1] != ']'
8953 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8954 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8956 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8957 strcpy (tlsname, name);
8958 strcat (tlsname,
8959 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8960 tlsaddr = copy_rtx (addr);
8961 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8963 else
8964 tlsaddr = addr;
8966 /* Place addr into TOC constant pool. */
8967 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8969 /* Output the TOC entry and create the MEM referencing the value. */
8970 if (constant_pool_expr_p (XEXP (sym, 0))
8971 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8973 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8974 mem = gen_const_mem (Pmode, tocref);
8975 set_mem_alias_set (mem, get_TOC_alias_set ());
8977 else
8978 return sym;
8980 /* Use global-dynamic for local-dynamic. */
8981 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8982 || model == TLS_MODEL_LOCAL_DYNAMIC)
8984 /* Create new TOC reference for @m symbol. */
8985 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8986 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8987 strcpy (tlsname, "*LCM");
8988 strcat (tlsname, name + 3);
8989 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8990 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8991 tocref = create_TOC_reference (modaddr, NULL_RTX);
8992 rtx modmem = gen_const_mem (Pmode, tocref);
8993 set_mem_alias_set (modmem, get_TOC_alias_set ());
8995 rtx modreg = gen_reg_rtx (Pmode);
8996 emit_insn (gen_rtx_SET (modreg, modmem));
8998 tmpreg = gen_reg_rtx (Pmode);
8999 emit_insn (gen_rtx_SET (tmpreg, mem));
9001 dest = gen_reg_rtx (Pmode);
9002 if (TARGET_32BIT)
9003 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9004 else
9005 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9006 return dest;
9008 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9009 else if (TARGET_32BIT)
9011 tlsreg = gen_reg_rtx (SImode);
9012 emit_insn (gen_tls_get_tpointer (tlsreg));
9014 else
9015 tlsreg = gen_rtx_REG (DImode, 13);
9017 /* Load the TOC value into temporary register. */
9018 tmpreg = gen_reg_rtx (Pmode);
9019 emit_insn (gen_rtx_SET (tmpreg, mem));
9020 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9021 gen_rtx_MINUS (Pmode, addr, tlsreg));
9023 /* Add TOC symbol value to TLS pointer. */
9024 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9026 return dest;
9029 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9030 this (thread-local) address. */
9032 static rtx
9033 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9035 rtx dest, insn;
9037 if (TARGET_XCOFF)
9038 return rs6000_legitimize_tls_address_aix (addr, model);
9040 dest = gen_reg_rtx (Pmode);
9041 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9043 rtx tlsreg;
9045 if (TARGET_64BIT)
9047 tlsreg = gen_rtx_REG (Pmode, 13);
9048 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9050 else
9052 tlsreg = gen_rtx_REG (Pmode, 2);
9053 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9055 emit_insn (insn);
9057 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9059 rtx tlsreg, tmp;
9061 tmp = gen_reg_rtx (Pmode);
9062 if (TARGET_64BIT)
9064 tlsreg = gen_rtx_REG (Pmode, 13);
9065 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9067 else
9069 tlsreg = gen_rtx_REG (Pmode, 2);
9070 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9072 emit_insn (insn);
9073 if (TARGET_64BIT)
9074 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9075 else
9076 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9077 emit_insn (insn);
9079 else
9081 rtx r3, got, tga, tmp1, tmp2, call_insn;
9083 /* We currently use relocations like @got@tlsgd for tls, which
9084 means the linker will handle allocation of tls entries, placing
9085 them in the .got section. So use a pointer to the .got section,
9086 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9087 or to secondary GOT sections used by 32-bit -fPIC. */
9088 if (TARGET_64BIT)
9089 got = gen_rtx_REG (Pmode, 2);
9090 else
9092 if (flag_pic == 1)
9093 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9094 else
9096 rtx gsym = rs6000_got_sym ();
9097 got = gen_reg_rtx (Pmode);
9098 if (flag_pic == 0)
9099 rs6000_emit_move (got, gsym, Pmode);
9100 else
9102 rtx mem, lab, last;
9104 tmp1 = gen_reg_rtx (Pmode);
9105 tmp2 = gen_reg_rtx (Pmode);
9106 mem = gen_const_mem (Pmode, tmp1);
9107 lab = gen_label_rtx ();
9108 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9109 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9110 if (TARGET_LINK_STACK)
9111 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9112 emit_move_insn (tmp2, mem);
9113 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9114 set_unique_reg_note (last, REG_EQUAL, gsym);
9119 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9121 tga = rs6000_tls_get_addr ();
9122 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9123 1, const0_rtx, Pmode);
9125 r3 = gen_rtx_REG (Pmode, 3);
9126 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9128 if (TARGET_64BIT)
9129 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9130 else
9131 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9133 else if (DEFAULT_ABI == ABI_V4)
9134 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9135 else
9136 gcc_unreachable ();
9137 call_insn = last_call_insn ();
9138 PATTERN (call_insn) = insn;
9139 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9140 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9141 pic_offset_table_rtx);
9143 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9145 tga = rs6000_tls_get_addr ();
9146 tmp1 = gen_reg_rtx (Pmode);
9147 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9148 1, const0_rtx, Pmode);
9150 r3 = gen_rtx_REG (Pmode, 3);
9151 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9153 if (TARGET_64BIT)
9154 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9155 else
9156 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9158 else if (DEFAULT_ABI == ABI_V4)
9159 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9160 else
9161 gcc_unreachable ();
9162 call_insn = last_call_insn ();
9163 PATTERN (call_insn) = insn;
9164 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9165 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9166 pic_offset_table_rtx);
9168 if (rs6000_tls_size == 16)
9170 if (TARGET_64BIT)
9171 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9172 else
9173 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9175 else if (rs6000_tls_size == 32)
9177 tmp2 = gen_reg_rtx (Pmode);
9178 if (TARGET_64BIT)
9179 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9180 else
9181 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9182 emit_insn (insn);
9183 if (TARGET_64BIT)
9184 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9185 else
9186 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9188 else
9190 tmp2 = gen_reg_rtx (Pmode);
9191 if (TARGET_64BIT)
9192 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9193 else
9194 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9195 emit_insn (insn);
9196 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9198 emit_insn (insn);
9200 else
9202 /* IE, or 64-bit offset LE. */
9203 tmp2 = gen_reg_rtx (Pmode);
9204 if (TARGET_64BIT)
9205 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9206 else
9207 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9208 emit_insn (insn);
9209 if (TARGET_64BIT)
9210 insn = gen_tls_tls_64 (dest, tmp2, addr);
9211 else
9212 insn = gen_tls_tls_32 (dest, tmp2, addr);
9213 emit_insn (insn);
9217 return dest;
9220 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9222 static bool
9223 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9225 if (GET_CODE (x) == HIGH
9226 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9227 return true;
9229 /* A TLS symbol in the TOC cannot contain a sum. */
9230 if (GET_CODE (x) == CONST
9231 && GET_CODE (XEXP (x, 0)) == PLUS
9232 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9233 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9234 return true;
9236 /* Do not place an ELF TLS symbol in the constant pool. */
9237 return TARGET_ELF && tls_referenced_p (x);
9240 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9241 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9242 can be addressed relative to the toc pointer. */
9244 static bool
9245 use_toc_relative_ref (rtx sym, machine_mode mode)
9247 return ((constant_pool_expr_p (sym)
9248 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9249 get_pool_mode (sym)))
9250 || (TARGET_CMODEL == CMODEL_MEDIUM
9251 && SYMBOL_REF_LOCAL_P (sym)
9252 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9255 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9256 replace the input X, or the original X if no replacement is called for.
9257 The output parameter *WIN is 1 if the calling macro should goto WIN,
9258 0 if it should not.
9260 For RS/6000, we wish to handle large displacements off a base
9261 register by splitting the addend across an addiu/addis and the mem insn.
9262 This cuts number of extra insns needed from 3 to 1.
9264 On Darwin, we use this to generate code for floating point constants.
9265 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9266 The Darwin code is inside #if TARGET_MACHO because only then are the
9267 machopic_* functions defined. */
9268 static rtx
9269 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9270 int opnum, int type,
9271 int ind_levels ATTRIBUTE_UNUSED, int *win)
9273 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9274 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9276 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9277 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9278 if (reg_offset_p
9279 && opnum == 1
9280 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9281 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9282 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9283 && TARGET_P9_VECTOR)
9284 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9285 && TARGET_P9_VECTOR)))
9286 reg_offset_p = false;
9288 /* We must recognize output that we have already generated ourselves. */
9289 if (GET_CODE (x) == PLUS
9290 && GET_CODE (XEXP (x, 0)) == PLUS
9291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9292 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9293 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9295 if (TARGET_DEBUG_ADDR)
9297 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9298 debug_rtx (x);
9300 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9301 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9302 opnum, (enum reload_type) type);
9303 *win = 1;
9304 return x;
9307 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9308 if (GET_CODE (x) == LO_SUM
9309 && GET_CODE (XEXP (x, 0)) == HIGH)
9311 if (TARGET_DEBUG_ADDR)
9313 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9314 debug_rtx (x);
9316 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9317 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9318 opnum, (enum reload_type) type);
9319 *win = 1;
9320 return x;
9323 #if TARGET_MACHO
9324 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9325 && GET_CODE (x) == LO_SUM
9326 && GET_CODE (XEXP (x, 0)) == PLUS
9327 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9328 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9329 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9330 && machopic_operand_p (XEXP (x, 1)))
9332 /* Result of previous invocation of this function on Darwin
9333 floating point constant. */
9334 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9335 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9336 opnum, (enum reload_type) type);
9337 *win = 1;
9338 return x;
9340 #endif
9342 if (TARGET_CMODEL != CMODEL_SMALL
9343 && reg_offset_p
9344 && !quad_offset_p
9345 && small_toc_ref (x, VOIDmode))
9347 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9348 x = gen_rtx_LO_SUM (Pmode, hi, x);
9349 if (TARGET_DEBUG_ADDR)
9351 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9352 debug_rtx (x);
9354 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9355 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9356 opnum, (enum reload_type) type);
9357 *win = 1;
9358 return x;
9361 if (GET_CODE (x) == PLUS
9362 && REG_P (XEXP (x, 0))
9363 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9364 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9365 && CONST_INT_P (XEXP (x, 1))
9366 && reg_offset_p
9367 && !SPE_VECTOR_MODE (mode)
9368 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9369 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9371 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9372 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9373 HOST_WIDE_INT high
9374 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9376 /* Check for 32-bit overflow or quad addresses with one of the
9377 four least significant bits set. */
9378 if (high + low != val
9379 || (quad_offset_p && (low & 0xf)))
9381 *win = 0;
9382 return x;
9385 /* Reload the high part into a base reg; leave the low part
9386 in the mem directly. */
9388 x = gen_rtx_PLUS (GET_MODE (x),
9389 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9390 GEN_INT (high)),
9391 GEN_INT (low));
9393 if (TARGET_DEBUG_ADDR)
9395 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9396 debug_rtx (x);
9398 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9399 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9400 opnum, (enum reload_type) type);
9401 *win = 1;
9402 return x;
9405 if (GET_CODE (x) == SYMBOL_REF
9406 && reg_offset_p
9407 && !quad_offset_p
9408 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9409 && !SPE_VECTOR_MODE (mode)
9410 #if TARGET_MACHO
9411 && DEFAULT_ABI == ABI_DARWIN
9412 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9413 && machopic_symbol_defined_p (x)
9414 #else
9415 && DEFAULT_ABI == ABI_V4
9416 && !flag_pic
9417 #endif
9418 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9419 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9420 without fprs.
9421 ??? Assume floating point reg based on mode? This assumption is
9422 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9423 where reload ends up doing a DFmode load of a constant from
9424 mem using two gprs. Unfortunately, at this point reload
9425 hasn't yet selected regs so poking around in reload data
9426 won't help and even if we could figure out the regs reliably,
9427 we'd still want to allow this transformation when the mem is
9428 naturally aligned. Since we say the address is good here, we
9429 can't disable offsets from LO_SUMs in mem_operand_gpr.
9430 FIXME: Allow offset from lo_sum for other modes too, when
9431 mem is sufficiently aligned.
9433 Also disallow this if the type can go in VMX/Altivec registers, since
9434 those registers do not have d-form (reg+offset) address modes. */
9435 && !reg_addr[mode].scalar_in_vmx_p
9436 && mode != TFmode
9437 && mode != TDmode
9438 && mode != IFmode
9439 && mode != KFmode
9440 && (mode != TImode || !TARGET_VSX_TIMODE)
9441 && mode != PTImode
9442 && (mode != DImode || TARGET_POWERPC64)
9443 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9444 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9446 #if TARGET_MACHO
9447 if (flag_pic)
9449 rtx offset = machopic_gen_offset (x);
9450 x = gen_rtx_LO_SUM (GET_MODE (x),
9451 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9452 gen_rtx_HIGH (Pmode, offset)), offset);
9454 else
9455 #endif
9456 x = gen_rtx_LO_SUM (GET_MODE (x),
9457 gen_rtx_HIGH (Pmode, x), x);
9459 if (TARGET_DEBUG_ADDR)
9461 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9462 debug_rtx (x);
9464 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9465 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9466 opnum, (enum reload_type) type);
9467 *win = 1;
9468 return x;
9471 /* Reload an offset address wrapped by an AND that represents the
9472 masking of the lower bits. Strip the outer AND and let reload
9473 convert the offset address into an indirect address. For VSX,
9474 force reload to create the address with an AND in a separate
9475 register, because we can't guarantee an altivec register will
9476 be used. */
9477 if (VECTOR_MEM_ALTIVEC_P (mode)
9478 && GET_CODE (x) == AND
9479 && GET_CODE (XEXP (x, 0)) == PLUS
9480 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9481 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9482 && GET_CODE (XEXP (x, 1)) == CONST_INT
9483 && INTVAL (XEXP (x, 1)) == -16)
9485 x = XEXP (x, 0);
9486 *win = 1;
9487 return x;
9490 if (TARGET_TOC
9491 && reg_offset_p
9492 && !quad_offset_p
9493 && GET_CODE (x) == SYMBOL_REF
9494 && use_toc_relative_ref (x, mode))
9496 x = create_TOC_reference (x, NULL_RTX);
9497 if (TARGET_CMODEL != CMODEL_SMALL)
9499 if (TARGET_DEBUG_ADDR)
9501 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9502 debug_rtx (x);
9504 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9505 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9506 opnum, (enum reload_type) type);
9508 *win = 1;
9509 return x;
9511 *win = 0;
9512 return x;
9515 /* Debug version of rs6000_legitimize_reload_address. */
9516 static rtx
9517 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9518 int opnum, int type,
9519 int ind_levels, int *win)
9521 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9522 ind_levels, win);
9523 fprintf (stderr,
9524 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9525 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9526 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9527 debug_rtx (x);
9529 if (x == ret)
9530 fprintf (stderr, "Same address returned\n");
9531 else if (!ret)
9532 fprintf (stderr, "NULL returned\n");
9533 else
9535 fprintf (stderr, "New address:\n");
9536 debug_rtx (ret);
9539 return ret;
9542 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9543 that is a valid memory address for an instruction.
9544 The MODE argument is the machine mode for the MEM expression
9545 that wants to use this address.
9547 On the RS/6000, there are four valid address: a SYMBOL_REF that
9548 refers to a constant pool entry of an address (or the sum of it
9549 plus a constant), a short (16-bit signed) constant plus a register,
9550 the sum of two registers, or a register indirect, possibly with an
9551 auto-increment. For DFmode, DDmode and DImode with a constant plus
9552 register, we must ensure that both words are addressable or PowerPC64
9553 with offset word aligned.
9555 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9556 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9557 because adjacent memory cells are accessed by adding word-sized offsets
9558 during assembly output. */
9559 static bool
9560 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9562 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9563 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9565 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9566 if (VECTOR_MEM_ALTIVEC_P (mode)
9567 && GET_CODE (x) == AND
9568 && GET_CODE (XEXP (x, 1)) == CONST_INT
9569 && INTVAL (XEXP (x, 1)) == -16)
9570 x = XEXP (x, 0);
9572 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9573 return 0;
9574 if (legitimate_indirect_address_p (x, reg_ok_strict))
9575 return 1;
9576 if (TARGET_UPDATE
9577 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9578 && mode_supports_pre_incdec_p (mode)
9579 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9580 return 1;
9581 /* Handle restricted vector d-form offsets in ISA 3.0. */
9582 if (quad_offset_p)
9584 if (quad_address_p (x, mode, reg_ok_strict))
9585 return 1;
9587 else if (virtual_stack_registers_memory_p (x))
9588 return 1;
9590 else if (reg_offset_p)
9592 if (legitimate_small_data_p (mode, x))
9593 return 1;
9594 if (legitimate_constant_pool_address_p (x, mode,
9595 reg_ok_strict || lra_in_progress))
9596 return 1;
9597 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9598 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9599 return 1;
9602 /* For TImode, if we have TImode in VSX registers, only allow register
9603 indirect addresses. This will allow the values to go in either GPRs
9604 or VSX registers without reloading. The vector types would tend to
9605 go into VSX registers, so we allow REG+REG, while TImode seems
9606 somewhat split, in that some uses are GPR based, and some VSX based. */
9607 /* FIXME: We could loosen this by changing the following to
9608 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9609 but currently we cannot allow REG+REG addressing for TImode. See
9610 PR72827 for complete details on how this ends up hoodwinking DSE. */
9611 if (mode == TImode && TARGET_VSX_TIMODE)
9612 return 0;
9613 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9614 if (! reg_ok_strict
9615 && reg_offset_p
9616 && GET_CODE (x) == PLUS
9617 && GET_CODE (XEXP (x, 0)) == REG
9618 && (XEXP (x, 0) == virtual_stack_vars_rtx
9619 || XEXP (x, 0) == arg_pointer_rtx)
9620 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9621 return 1;
9622 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9623 return 1;
9624 if (!FLOAT128_2REG_P (mode)
9625 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9626 || TARGET_POWERPC64
9627 || (mode != DFmode && mode != DDmode)
9628 || (TARGET_E500_DOUBLE && mode != DDmode))
9629 && (TARGET_POWERPC64 || mode != DImode)
9630 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9631 && mode != PTImode
9632 && !avoiding_indexed_address_p (mode)
9633 && legitimate_indexed_address_p (x, reg_ok_strict))
9634 return 1;
9635 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9636 && mode_supports_pre_modify_p (mode)
9637 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9638 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9639 reg_ok_strict, false)
9640 || (!avoiding_indexed_address_p (mode)
9641 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9642 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9643 return 1;
9644 if (reg_offset_p && !quad_offset_p
9645 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9646 return 1;
9647 return 0;
9650 /* Debug version of rs6000_legitimate_address_p. */
9651 static bool
9652 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9653 bool reg_ok_strict)
9655 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9656 fprintf (stderr,
9657 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9658 "strict = %d, reload = %s, code = %s\n",
9659 ret ? "true" : "false",
9660 GET_MODE_NAME (mode),
9661 reg_ok_strict,
9662 (reload_completed
9663 ? "after"
9664 : (reload_in_progress ? "progress" : "before")),
9665 GET_RTX_NAME (GET_CODE (x)));
9666 debug_rtx (x);
9668 return ret;
9671 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9673 static bool
9674 rs6000_mode_dependent_address_p (const_rtx addr,
9675 addr_space_t as ATTRIBUTE_UNUSED)
9677 return rs6000_mode_dependent_address_ptr (addr);
9680 /* Go to LABEL if ADDR (a legitimate address expression)
9681 has an effect that depends on the machine mode it is used for.
9683 On the RS/6000 this is true of all integral offsets (since AltiVec
9684 and VSX modes don't allow them) or is a pre-increment or decrement.
9686 ??? Except that due to conceptual problems in offsettable_address_p
9687 we can't really report the problems of integral offsets. So leave
9688 this assuming that the adjustable offset must be valid for the
9689 sub-words of a TFmode operand, which is what we had before. */
9691 static bool
9692 rs6000_mode_dependent_address (const_rtx addr)
9694 switch (GET_CODE (addr))
9696 case PLUS:
9697 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9698 is considered a legitimate address before reload, so there
9699 are no offset restrictions in that case. Note that this
9700 condition is safe in strict mode because any address involving
9701 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9702 been rejected as illegitimate. */
9703 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9704 && XEXP (addr, 0) != arg_pointer_rtx
9705 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9707 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9708 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9710 break;
9712 case LO_SUM:
9713 /* Anything in the constant pool is sufficiently aligned that
9714 all bytes have the same high part address. */
9715 return !legitimate_constant_pool_address_p (addr, QImode, false);
9717 /* Auto-increment cases are now treated generically in recog.c. */
9718 case PRE_MODIFY:
9719 return TARGET_UPDATE;
9721 /* AND is only allowed in Altivec loads. */
9722 case AND:
9723 return true;
9725 default:
9726 break;
9729 return false;
9732 /* Debug version of rs6000_mode_dependent_address. */
9733 static bool
9734 rs6000_debug_mode_dependent_address (const_rtx addr)
9736 bool ret = rs6000_mode_dependent_address (addr);
9738 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9739 ret ? "true" : "false");
9740 debug_rtx (addr);
9742 return ret;
9745 /* Implement FIND_BASE_TERM. */
9748 rs6000_find_base_term (rtx op)
9750 rtx base;
9752 base = op;
9753 if (GET_CODE (base) == CONST)
9754 base = XEXP (base, 0);
9755 if (GET_CODE (base) == PLUS)
9756 base = XEXP (base, 0);
9757 if (GET_CODE (base) == UNSPEC)
9758 switch (XINT (base, 1))
9760 case UNSPEC_TOCREL:
9761 case UNSPEC_MACHOPIC_OFFSET:
9762 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9763 for aliasing purposes. */
9764 return XVECEXP (base, 0, 0);
9767 return op;
9770 /* More elaborate version of recog's offsettable_memref_p predicate
9771 that works around the ??? note of rs6000_mode_dependent_address.
9772 In particular it accepts
9774 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9776 in 32-bit mode, that the recog predicate rejects. */
9778 static bool
9779 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9781 bool worst_case;
9783 if (!MEM_P (op))
9784 return false;
9786 /* First mimic offsettable_memref_p. */
9787 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9788 return true;
9790 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9791 the latter predicate knows nothing about the mode of the memory
9792 reference and, therefore, assumes that it is the largest supported
9793 mode (TFmode). As a consequence, legitimate offsettable memory
9794 references are rejected. rs6000_legitimate_offset_address_p contains
9795 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9796 at least with a little bit of help here given that we know the
9797 actual registers used. */
9798 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9799 || GET_MODE_SIZE (reg_mode) == 4);
9800 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9801 true, worst_case);
9804 /* Determine the reassociation width to be used in reassociate_bb.
9805 This takes into account how many parallel operations we
9806 can actually do of a given type, and also the latency.
9808 int add/sub 6/cycle
9809 mul 2/cycle
9810 vect add/sub/mul 2/cycle
9811 fp add/sub/mul 2/cycle
9812 dfp 1/cycle
9815 static int
9816 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9817 enum machine_mode mode)
9819 switch (rs6000_cpu)
9821 case PROCESSOR_POWER8:
9822 case PROCESSOR_POWER9:
9823 if (DECIMAL_FLOAT_MODE_P (mode))
9824 return 1;
9825 if (VECTOR_MODE_P (mode))
9826 return 4;
9827 if (INTEGRAL_MODE_P (mode))
9828 return opc == MULT_EXPR ? 4 : 6;
9829 if (FLOAT_MODE_P (mode))
9830 return 4;
9831 break;
9832 default:
9833 break;
9835 return 1;
9838 /* Change register usage conditional on target flags. */
9839 static void
9840 rs6000_conditional_register_usage (void)
9842 int i;
9844 if (TARGET_DEBUG_TARGET)
9845 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9847 /* Set MQ register fixed (already call_used) so that it will not be
9848 allocated. */
9849 fixed_regs[64] = 1;
9851 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9852 if (TARGET_64BIT)
9853 fixed_regs[13] = call_used_regs[13]
9854 = call_really_used_regs[13] = 1;
9856 /* Conditionally disable FPRs. */
9857 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9858 for (i = 32; i < 64; i++)
9859 fixed_regs[i] = call_used_regs[i]
9860 = call_really_used_regs[i] = 1;
9862 /* The TOC register is not killed across calls in a way that is
9863 visible to the compiler. */
9864 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9865 call_really_used_regs[2] = 0;
9867 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9868 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9870 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9871 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9872 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9873 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9875 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9876 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9877 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9878 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9880 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9881 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9882 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9884 if (TARGET_SPE)
9886 global_regs[SPEFSCR_REGNO] = 1;
9887 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9888 registers in prologues and epilogues. We no longer use r14
9889 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9890 pool for link-compatibility with older versions of GCC. Once
9891 "old" code has died out, we can return r14 to the allocation
9892 pool. */
9893 fixed_regs[14]
9894 = call_used_regs[14]
9895 = call_really_used_regs[14] = 1;
9898 if (!TARGET_ALTIVEC && !TARGET_VSX)
9900 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9901 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9902 call_really_used_regs[VRSAVE_REGNO] = 1;
9905 if (TARGET_ALTIVEC || TARGET_VSX)
9906 global_regs[VSCR_REGNO] = 1;
9908 if (TARGET_ALTIVEC_ABI)
9910 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9911 call_used_regs[i] = call_really_used_regs[i] = 1;
9913 /* AIX reserves VR20:31 in non-extended ABI mode. */
9914 if (TARGET_XCOFF)
9915 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9916 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9921 /* Output insns to set DEST equal to the constant SOURCE as a series of
9922 lis, ori and shl instructions and return TRUE. */
9924 bool
9925 rs6000_emit_set_const (rtx dest, rtx source)
9927 machine_mode mode = GET_MODE (dest);
9928 rtx temp, set;
9929 rtx_insn *insn;
9930 HOST_WIDE_INT c;
9932 gcc_checking_assert (CONST_INT_P (source));
9933 c = INTVAL (source);
9934 switch (mode)
9936 case QImode:
9937 case HImode:
9938 emit_insn (gen_rtx_SET (dest, source));
9939 return true;
9941 case SImode:
9942 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9944 emit_insn (gen_rtx_SET (copy_rtx (temp),
9945 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9946 emit_insn (gen_rtx_SET (dest,
9947 gen_rtx_IOR (SImode, copy_rtx (temp),
9948 GEN_INT (c & 0xffff))));
9949 break;
9951 case DImode:
9952 if (!TARGET_POWERPC64)
9954 rtx hi, lo;
9956 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9957 DImode);
9958 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9959 DImode);
9960 emit_move_insn (hi, GEN_INT (c >> 32));
9961 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9962 emit_move_insn (lo, GEN_INT (c));
9964 else
9965 rs6000_emit_set_long_const (dest, c);
9966 break;
9968 default:
9969 gcc_unreachable ();
9972 insn = get_last_insn ();
9973 set = single_set (insn);
9974 if (! CONSTANT_P (SET_SRC (set)))
9975 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9977 return true;
9980 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9981 Output insns to set DEST equal to the constant C as a series of
9982 lis, ori and shl instructions. */
9984 static void
9985 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9987 rtx temp;
9988 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9990 ud1 = c & 0xffff;
9991 c = c >> 16;
9992 ud2 = c & 0xffff;
9993 c = c >> 16;
9994 ud3 = c & 0xffff;
9995 c = c >> 16;
9996 ud4 = c & 0xffff;
9998 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9999 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10000 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10002 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10003 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10005 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10007 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10008 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10009 if (ud1 != 0)
10010 emit_move_insn (dest,
10011 gen_rtx_IOR (DImode, copy_rtx (temp),
10012 GEN_INT (ud1)));
10014 else if (ud3 == 0 && ud4 == 0)
10016 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10018 gcc_assert (ud2 & 0x8000);
10019 emit_move_insn (copy_rtx (temp),
10020 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10021 if (ud1 != 0)
10022 emit_move_insn (copy_rtx (temp),
10023 gen_rtx_IOR (DImode, copy_rtx (temp),
10024 GEN_INT (ud1)));
10025 emit_move_insn (dest,
10026 gen_rtx_ZERO_EXTEND (DImode,
10027 gen_lowpart (SImode,
10028 copy_rtx (temp))));
10030 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10031 || (ud4 == 0 && ! (ud3 & 0x8000)))
10033 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10035 emit_move_insn (copy_rtx (temp),
10036 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10037 if (ud2 != 0)
10038 emit_move_insn (copy_rtx (temp),
10039 gen_rtx_IOR (DImode, copy_rtx (temp),
10040 GEN_INT (ud2)));
10041 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10042 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10043 GEN_INT (16)));
10044 if (ud1 != 0)
10045 emit_move_insn (dest,
10046 gen_rtx_IOR (DImode, copy_rtx (temp),
10047 GEN_INT (ud1)));
10049 else
10051 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10053 emit_move_insn (copy_rtx (temp),
10054 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10055 if (ud3 != 0)
10056 emit_move_insn (copy_rtx (temp),
10057 gen_rtx_IOR (DImode, copy_rtx (temp),
10058 GEN_INT (ud3)));
10060 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10061 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10062 GEN_INT (32)));
10063 if (ud2 != 0)
10064 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10065 gen_rtx_IOR (DImode, copy_rtx (temp),
10066 GEN_INT (ud2 << 16)));
10067 if (ud1 != 0)
10068 emit_move_insn (dest,
10069 gen_rtx_IOR (DImode, copy_rtx (temp),
10070 GEN_INT (ud1)));
10074 /* Helper for the following. Get rid of [r+r] memory refs
10075 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10077 static void
10078 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10080 if (reload_in_progress)
10081 return;
10083 if (GET_CODE (operands[0]) == MEM
10084 && GET_CODE (XEXP (operands[0], 0)) != REG
10085 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10086 GET_MODE (operands[0]), false))
10087 operands[0]
10088 = replace_equiv_address (operands[0],
10089 copy_addr_to_reg (XEXP (operands[0], 0)));
10091 if (GET_CODE (operands[1]) == MEM
10092 && GET_CODE (XEXP (operands[1], 0)) != REG
10093 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10094 GET_MODE (operands[1]), false))
10095 operands[1]
10096 = replace_equiv_address (operands[1],
10097 copy_addr_to_reg (XEXP (operands[1], 0)));
10100 /* Generate a vector of constants to permute MODE for a little-endian
10101 storage operation by swapping the two halves of a vector. */
10102 static rtvec
10103 rs6000_const_vec (machine_mode mode)
10105 int i, subparts;
10106 rtvec v;
10108 switch (mode)
10110 case V1TImode:
10111 subparts = 1;
10112 break;
10113 case V2DFmode:
10114 case V2DImode:
10115 subparts = 2;
10116 break;
10117 case V4SFmode:
10118 case V4SImode:
10119 subparts = 4;
10120 break;
10121 case V8HImode:
10122 subparts = 8;
10123 break;
10124 case V16QImode:
10125 subparts = 16;
10126 break;
10127 default:
10128 gcc_unreachable();
10131 v = rtvec_alloc (subparts);
10133 for (i = 0; i < subparts / 2; ++i)
10134 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10135 for (i = subparts / 2; i < subparts; ++i)
10136 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10138 return v;
10141 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10142 for a VSX load or store operation. */
10144 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10146 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10147 128-bit integers if they are allowed in VSX registers. */
10148 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
10149 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10150 else
10152 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10153 return gen_rtx_VEC_SELECT (mode, source, par);
10157 /* Emit a little-endian load from vector memory location SOURCE to VSX
10158 register DEST in mode MODE. The load is done with two permuting
10159 insn's that represent an lxvd2x and xxpermdi. */
10160 void
10161 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10163 rtx tmp, permute_mem, permute_reg;
10165 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10166 V1TImode). */
10167 if (mode == TImode || mode == V1TImode)
10169 mode = V2DImode;
10170 dest = gen_lowpart (V2DImode, dest);
10171 source = adjust_address (source, V2DImode, 0);
10174 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10175 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10176 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10177 emit_insn (gen_rtx_SET (tmp, permute_mem));
10178 emit_insn (gen_rtx_SET (dest, permute_reg));
10181 /* Emit a little-endian store to vector memory location DEST from VSX
10182 register SOURCE in mode MODE. The store is done with two permuting
10183 insn's that represent an xxpermdi and an stxvd2x. */
10184 void
10185 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10187 rtx tmp, permute_src, permute_tmp;
10189 /* This should never be called during or after reload, because it does
10190 not re-permute the source register. It is intended only for use
10191 during expand. */
10192 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10194 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10195 V1TImode). */
10196 if (mode == TImode || mode == V1TImode)
10198 mode = V2DImode;
10199 dest = adjust_address (dest, V2DImode, 0);
10200 source = gen_lowpart (V2DImode, source);
10203 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10204 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10205 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10206 emit_insn (gen_rtx_SET (tmp, permute_src));
10207 emit_insn (gen_rtx_SET (dest, permute_tmp));
10210 /* Emit a sequence representing a little-endian VSX load or store,
10211 moving data from SOURCE to DEST in mode MODE. This is done
10212 separately from rs6000_emit_move to ensure it is called only
10213 during expand. LE VSX loads and stores introduced later are
10214 handled with a split. The expand-time RTL generation allows
10215 us to optimize away redundant pairs of register-permutes. */
10216 void
10217 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10219 gcc_assert (!BYTES_BIG_ENDIAN
10220 && VECTOR_MEM_VSX_P (mode)
10221 && !TARGET_P9_VECTOR
10222 && !gpr_or_gpr_p (dest, source)
10223 && (MEM_P (source) ^ MEM_P (dest)));
10225 if (MEM_P (source))
10227 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10228 rs6000_emit_le_vsx_load (dest, source, mode);
10230 else
10232 if (!REG_P (source))
10233 source = force_reg (mode, source);
10234 rs6000_emit_le_vsx_store (dest, source, mode);
10238 /* Emit a move from SOURCE to DEST in mode MODE. */
10239 void
10240 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10242 rtx operands[2];
10243 operands[0] = dest;
10244 operands[1] = source;
10246 if (TARGET_DEBUG_ADDR)
10248 fprintf (stderr,
10249 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10250 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10251 GET_MODE_NAME (mode),
10252 reload_in_progress,
10253 reload_completed,
10254 can_create_pseudo_p ());
10255 debug_rtx (dest);
10256 fprintf (stderr, "source:\n");
10257 debug_rtx (source);
10260 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10261 if (CONST_WIDE_INT_P (operands[1])
10262 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10264 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10265 gcc_unreachable ();
10268 /* Check if GCC is setting up a block move that will end up using FP
10269 registers as temporaries. We must make sure this is acceptable. */
10270 if (GET_CODE (operands[0]) == MEM
10271 && GET_CODE (operands[1]) == MEM
10272 && mode == DImode
10273 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10274 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10275 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10276 ? 32 : MEM_ALIGN (operands[0])))
10277 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10278 ? 32
10279 : MEM_ALIGN (operands[1]))))
10280 && ! MEM_VOLATILE_P (operands [0])
10281 && ! MEM_VOLATILE_P (operands [1]))
10283 emit_move_insn (adjust_address (operands[0], SImode, 0),
10284 adjust_address (operands[1], SImode, 0));
10285 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10286 adjust_address (copy_rtx (operands[1]), SImode, 4));
10287 return;
10290 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10291 && !gpc_reg_operand (operands[1], mode))
10292 operands[1] = force_reg (mode, operands[1]);
10294 /* Recognize the case where operand[1] is a reference to thread-local
10295 data and load its address to a register. */
10296 if (tls_referenced_p (operands[1]))
10298 enum tls_model model;
10299 rtx tmp = operands[1];
10300 rtx addend = NULL;
10302 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10304 addend = XEXP (XEXP (tmp, 0), 1);
10305 tmp = XEXP (XEXP (tmp, 0), 0);
10308 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10309 model = SYMBOL_REF_TLS_MODEL (tmp);
10310 gcc_assert (model != 0);
10312 tmp = rs6000_legitimize_tls_address (tmp, model);
10313 if (addend)
10315 tmp = gen_rtx_PLUS (mode, tmp, addend);
10316 tmp = force_operand (tmp, operands[0]);
10318 operands[1] = tmp;
10321 /* Handle the case where reload calls us with an invalid address. */
10322 if (reload_in_progress && mode == Pmode
10323 && (! general_operand (operands[1], mode)
10324 || ! nonimmediate_operand (operands[0], mode)))
10325 goto emit_set;
10327 /* 128-bit constant floating-point values on Darwin should really be loaded
10328 as two parts. However, this premature splitting is a problem when DFmode
10329 values can go into Altivec registers. */
10330 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10331 && GET_CODE (operands[1]) == CONST_DOUBLE)
10333 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10334 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10335 DFmode);
10336 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10337 GET_MODE_SIZE (DFmode)),
10338 simplify_gen_subreg (DFmode, operands[1], mode,
10339 GET_MODE_SIZE (DFmode)),
10340 DFmode);
10341 return;
10344 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10345 cfun->machine->sdmode_stack_slot =
10346 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10349 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10350 p1:SD) if p1 is not of floating point class and p0 is spilled as
10351 we can have no analogous movsd_store for this. */
10352 if (lra_in_progress && mode == DDmode
10353 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10354 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10355 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10356 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10358 enum reg_class cl;
10359 int regno = REGNO (SUBREG_REG (operands[1]));
10361 if (regno >= FIRST_PSEUDO_REGISTER)
10363 cl = reg_preferred_class (regno);
10364 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10366 if (regno >= 0 && ! FP_REGNO_P (regno))
10368 mode = SDmode;
10369 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10370 operands[1] = SUBREG_REG (operands[1]);
10373 if (lra_in_progress
10374 && mode == SDmode
10375 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10376 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10377 && (REG_P (operands[1])
10378 || (GET_CODE (operands[1]) == SUBREG
10379 && REG_P (SUBREG_REG (operands[1])))))
10381 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10382 ? SUBREG_REG (operands[1]) : operands[1]);
10383 enum reg_class cl;
10385 if (regno >= FIRST_PSEUDO_REGISTER)
10387 cl = reg_preferred_class (regno);
10388 gcc_assert (cl != NO_REGS);
10389 regno = ira_class_hard_regs[cl][0];
10391 if (FP_REGNO_P (regno))
10393 if (GET_MODE (operands[0]) != DDmode)
10394 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10395 emit_insn (gen_movsd_store (operands[0], operands[1]));
10397 else if (INT_REGNO_P (regno))
10398 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10399 else
10400 gcc_unreachable();
10401 return;
10403 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10404 p:DD)) if p0 is not of floating point class and p1 is spilled as
10405 we can have no analogous movsd_load for this. */
10406 if (lra_in_progress && mode == DDmode
10407 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10408 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10409 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10410 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10412 enum reg_class cl;
10413 int regno = REGNO (SUBREG_REG (operands[0]));
10415 if (regno >= FIRST_PSEUDO_REGISTER)
10417 cl = reg_preferred_class (regno);
10418 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10420 if (regno >= 0 && ! FP_REGNO_P (regno))
10422 mode = SDmode;
10423 operands[0] = SUBREG_REG (operands[0]);
10424 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10427 if (lra_in_progress
10428 && mode == SDmode
10429 && (REG_P (operands[0])
10430 || (GET_CODE (operands[0]) == SUBREG
10431 && REG_P (SUBREG_REG (operands[0]))))
10432 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10433 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10435 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10436 ? SUBREG_REG (operands[0]) : operands[0]);
10437 enum reg_class cl;
10439 if (regno >= FIRST_PSEUDO_REGISTER)
10441 cl = reg_preferred_class (regno);
10442 gcc_assert (cl != NO_REGS);
10443 regno = ira_class_hard_regs[cl][0];
10445 if (FP_REGNO_P (regno))
10447 if (GET_MODE (operands[1]) != DDmode)
10448 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10449 emit_insn (gen_movsd_load (operands[0], operands[1]));
10451 else if (INT_REGNO_P (regno))
10452 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10453 else
10454 gcc_unreachable();
10455 return;
10458 if (reload_in_progress
10459 && mode == SDmode
10460 && cfun->machine->sdmode_stack_slot != NULL_RTX
10461 && MEM_P (operands[0])
10462 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10463 && REG_P (operands[1]))
10465 if (FP_REGNO_P (REGNO (operands[1])))
10467 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10468 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10469 emit_insn (gen_movsd_store (mem, operands[1]));
10471 else if (INT_REGNO_P (REGNO (operands[1])))
10473 rtx mem = operands[0];
10474 if (BYTES_BIG_ENDIAN)
10475 mem = adjust_address_nv (mem, mode, 4);
10476 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10477 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10479 else
10480 gcc_unreachable();
10481 return;
10483 if (reload_in_progress
10484 && mode == SDmode
10485 && REG_P (operands[0])
10486 && MEM_P (operands[1])
10487 && cfun->machine->sdmode_stack_slot != NULL_RTX
10488 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10490 if (FP_REGNO_P (REGNO (operands[0])))
10492 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10493 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10494 emit_insn (gen_movsd_load (operands[0], mem));
10496 else if (INT_REGNO_P (REGNO (operands[0])))
10498 rtx mem = operands[1];
10499 if (BYTES_BIG_ENDIAN)
10500 mem = adjust_address_nv (mem, mode, 4);
10501 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10502 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10504 else
10505 gcc_unreachable();
10506 return;
10509 /* FIXME: In the long term, this switch statement should go away
10510 and be replaced by a sequence of tests based on things like
10511 mode == Pmode. */
10512 switch (mode)
10514 case HImode:
10515 case QImode:
10516 if (CONSTANT_P (operands[1])
10517 && GET_CODE (operands[1]) != CONST_INT)
10518 operands[1] = force_const_mem (mode, operands[1]);
10519 break;
10521 case TFmode:
10522 case TDmode:
10523 case IFmode:
10524 case KFmode:
10525 if (FLOAT128_2REG_P (mode))
10526 rs6000_eliminate_indexed_memrefs (operands);
10527 /* fall through */
10529 case DFmode:
10530 case DDmode:
10531 case SFmode:
10532 case SDmode:
10533 if (CONSTANT_P (operands[1])
10534 && ! easy_fp_constant (operands[1], mode))
10535 operands[1] = force_const_mem (mode, operands[1]);
10536 break;
10538 case V16QImode:
10539 case V8HImode:
10540 case V4SFmode:
10541 case V4SImode:
10542 case V4HImode:
10543 case V2SFmode:
10544 case V2SImode:
10545 case V1DImode:
10546 case V2DFmode:
10547 case V2DImode:
10548 case V1TImode:
10549 if (CONSTANT_P (operands[1])
10550 && !easy_vector_constant (operands[1], mode))
10551 operands[1] = force_const_mem (mode, operands[1]);
10552 break;
10554 case SImode:
10555 case DImode:
10556 /* Use default pattern for address of ELF small data */
10557 if (TARGET_ELF
10558 && mode == Pmode
10559 && DEFAULT_ABI == ABI_V4
10560 && (GET_CODE (operands[1]) == SYMBOL_REF
10561 || GET_CODE (operands[1]) == CONST)
10562 && small_data_operand (operands[1], mode))
10564 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10565 return;
10568 if (DEFAULT_ABI == ABI_V4
10569 && mode == Pmode && mode == SImode
10570 && flag_pic == 1 && got_operand (operands[1], mode))
10572 emit_insn (gen_movsi_got (operands[0], operands[1]));
10573 return;
10576 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10577 && TARGET_NO_TOC
10578 && ! flag_pic
10579 && mode == Pmode
10580 && CONSTANT_P (operands[1])
10581 && GET_CODE (operands[1]) != HIGH
10582 && GET_CODE (operands[1]) != CONST_INT)
10584 rtx target = (!can_create_pseudo_p ()
10585 ? operands[0]
10586 : gen_reg_rtx (mode));
10588 /* If this is a function address on -mcall-aixdesc,
10589 convert it to the address of the descriptor. */
10590 if (DEFAULT_ABI == ABI_AIX
10591 && GET_CODE (operands[1]) == SYMBOL_REF
10592 && XSTR (operands[1], 0)[0] == '.')
10594 const char *name = XSTR (operands[1], 0);
10595 rtx new_ref;
10596 while (*name == '.')
10597 name++;
10598 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10599 CONSTANT_POOL_ADDRESS_P (new_ref)
10600 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10601 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10602 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10603 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10604 operands[1] = new_ref;
10607 if (DEFAULT_ABI == ABI_DARWIN)
10609 #if TARGET_MACHO
10610 if (MACHO_DYNAMIC_NO_PIC_P)
10612 /* Take care of any required data indirection. */
10613 operands[1] = rs6000_machopic_legitimize_pic_address (
10614 operands[1], mode, operands[0]);
10615 if (operands[0] != operands[1])
10616 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10617 return;
10619 #endif
10620 emit_insn (gen_macho_high (target, operands[1]));
10621 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10622 return;
10625 emit_insn (gen_elf_high (target, operands[1]));
10626 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10627 return;
10630 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10631 and we have put it in the TOC, we just need to make a TOC-relative
10632 reference to it. */
10633 if (TARGET_TOC
10634 && GET_CODE (operands[1]) == SYMBOL_REF
10635 && use_toc_relative_ref (operands[1], mode))
10636 operands[1] = create_TOC_reference (operands[1], operands[0]);
10637 else if (mode == Pmode
10638 && CONSTANT_P (operands[1])
10639 && GET_CODE (operands[1]) != HIGH
10640 && ((GET_CODE (operands[1]) != CONST_INT
10641 && ! easy_fp_constant (operands[1], mode))
10642 || (GET_CODE (operands[1]) == CONST_INT
10643 && (num_insns_constant (operands[1], mode)
10644 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10645 || (GET_CODE (operands[0]) == REG
10646 && FP_REGNO_P (REGNO (operands[0]))))
10647 && !toc_relative_expr_p (operands[1], false)
10648 && (TARGET_CMODEL == CMODEL_SMALL
10649 || can_create_pseudo_p ()
10650 || (REG_P (operands[0])
10651 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10654 #if TARGET_MACHO
10655 /* Darwin uses a special PIC legitimizer. */
10656 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10658 operands[1] =
10659 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10660 operands[0]);
10661 if (operands[0] != operands[1])
10662 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10663 return;
10665 #endif
10667 /* If we are to limit the number of things we put in the TOC and
10668 this is a symbol plus a constant we can add in one insn,
10669 just put the symbol in the TOC and add the constant. Don't do
10670 this if reload is in progress. */
10671 if (GET_CODE (operands[1]) == CONST
10672 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10673 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10674 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10675 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10676 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10677 && ! side_effects_p (operands[0]))
10679 rtx sym =
10680 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10681 rtx other = XEXP (XEXP (operands[1], 0), 1);
10683 sym = force_reg (mode, sym);
10684 emit_insn (gen_add3_insn (operands[0], sym, other));
10685 return;
10688 operands[1] = force_const_mem (mode, operands[1]);
10690 if (TARGET_TOC
10691 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10692 && constant_pool_expr_p (XEXP (operands[1], 0))
10693 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10694 get_pool_constant (XEXP (operands[1], 0)),
10695 get_pool_mode (XEXP (operands[1], 0))))
10697 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10698 operands[0]);
10699 operands[1] = gen_const_mem (mode, tocref);
10700 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10703 break;
10705 case TImode:
10706 if (!VECTOR_MEM_VSX_P (TImode))
10707 rs6000_eliminate_indexed_memrefs (operands);
10708 break;
10710 case PTImode:
10711 rs6000_eliminate_indexed_memrefs (operands);
10712 break;
10714 default:
10715 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10718 /* Above, we may have called force_const_mem which may have returned
10719 an invalid address. If we can, fix this up; otherwise, reload will
10720 have to deal with it. */
10721 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10722 operands[1] = validize_mem (operands[1]);
10724 emit_set:
10725 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10728 /* Return true if a structure, union or array containing FIELD should be
10729 accessed using `BLKMODE'.
10731 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10732 entire thing in a DI and use subregs to access the internals.
10733 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10734 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10735 best thing to do is set structs to BLKmode and avoid Severe Tire
10736 Damage.
10738 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10739 fit into 1, whereas DI still needs two. */
10741 static bool
10742 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10744 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10745 || (TARGET_E500_DOUBLE && mode == DFmode));
10748 /* Nonzero if we can use a floating-point register to pass this arg. */
10749 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10750 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10751 && (CUM)->fregno <= FP_ARG_MAX_REG \
10752 && TARGET_HARD_FLOAT && TARGET_FPRS)
10754 /* Nonzero if we can use an AltiVec register to pass this arg. */
10755 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10756 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10757 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10758 && TARGET_ALTIVEC_ABI \
10759 && (NAMED))
10761 /* Walk down the type tree of TYPE counting consecutive base elements.
10762 If *MODEP is VOIDmode, then set it to the first valid floating point
10763 or vector type. If a non-floating point or vector type is found, or
10764 if a floating point or vector type that doesn't match a non-VOIDmode
10765 *MODEP is found, then return -1, otherwise return the count in the
10766 sub-tree. */
10768 static int
10769 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10771 machine_mode mode;
10772 HOST_WIDE_INT size;
10774 switch (TREE_CODE (type))
10776 case REAL_TYPE:
10777 mode = TYPE_MODE (type);
10778 if (!SCALAR_FLOAT_MODE_P (mode))
10779 return -1;
10781 if (*modep == VOIDmode)
10782 *modep = mode;
10784 if (*modep == mode)
10785 return 1;
10787 break;
10789 case COMPLEX_TYPE:
10790 mode = TYPE_MODE (TREE_TYPE (type));
10791 if (!SCALAR_FLOAT_MODE_P (mode))
10792 return -1;
10794 if (*modep == VOIDmode)
10795 *modep = mode;
10797 if (*modep == mode)
10798 return 2;
10800 break;
10802 case VECTOR_TYPE:
10803 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10804 return -1;
10806 /* Use V4SImode as representative of all 128-bit vector types. */
10807 size = int_size_in_bytes (type);
10808 switch (size)
10810 case 16:
10811 mode = V4SImode;
10812 break;
10813 default:
10814 return -1;
10817 if (*modep == VOIDmode)
10818 *modep = mode;
10820 /* Vector modes are considered to be opaque: two vectors are
10821 equivalent for the purposes of being homogeneous aggregates
10822 if they are the same size. */
10823 if (*modep == mode)
10824 return 1;
10826 break;
10828 case ARRAY_TYPE:
10830 int count;
10831 tree index = TYPE_DOMAIN (type);
10833 /* Can't handle incomplete types nor sizes that are not
10834 fixed. */
10835 if (!COMPLETE_TYPE_P (type)
10836 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10837 return -1;
10839 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10840 if (count == -1
10841 || !index
10842 || !TYPE_MAX_VALUE (index)
10843 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10844 || !TYPE_MIN_VALUE (index)
10845 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10846 || count < 0)
10847 return -1;
10849 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10850 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10852 /* There must be no padding. */
10853 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10854 return -1;
10856 return count;
10859 case RECORD_TYPE:
10861 int count = 0;
10862 int sub_count;
10863 tree field;
10865 /* Can't handle incomplete types nor sizes that are not
10866 fixed. */
10867 if (!COMPLETE_TYPE_P (type)
10868 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10869 return -1;
10871 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10873 if (TREE_CODE (field) != FIELD_DECL)
10874 continue;
10876 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10877 if (sub_count < 0)
10878 return -1;
10879 count += sub_count;
10882 /* There must be no padding. */
10883 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10884 return -1;
10886 return count;
10889 case UNION_TYPE:
10890 case QUAL_UNION_TYPE:
10892 /* These aren't very interesting except in a degenerate case. */
10893 int count = 0;
10894 int sub_count;
10895 tree field;
10897 /* Can't handle incomplete types nor sizes that are not
10898 fixed. */
10899 if (!COMPLETE_TYPE_P (type)
10900 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10901 return -1;
10903 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10905 if (TREE_CODE (field) != FIELD_DECL)
10906 continue;
10908 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10909 if (sub_count < 0)
10910 return -1;
10911 count = count > sub_count ? count : sub_count;
10914 /* There must be no padding. */
10915 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10916 return -1;
10918 return count;
10921 default:
10922 break;
10925 return -1;
10928 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10929 float or vector aggregate that shall be passed in FP/vector registers
10930 according to the ELFv2 ABI, return the homogeneous element mode in
10931 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10933 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10935 static bool
10936 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10937 machine_mode *elt_mode,
10938 int *n_elts)
10940 /* Note that we do not accept complex types at the top level as
10941 homogeneous aggregates; these types are handled via the
10942 targetm.calls.split_complex_arg mechanism. Complex types
10943 can be elements of homogeneous aggregates, however. */
10944 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10946 machine_mode field_mode = VOIDmode;
10947 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10949 if (field_count > 0)
10951 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10952 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10954 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10955 up to AGGR_ARG_NUM_REG registers. */
10956 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10958 if (elt_mode)
10959 *elt_mode = field_mode;
10960 if (n_elts)
10961 *n_elts = field_count;
10962 return true;
10967 if (elt_mode)
10968 *elt_mode = mode;
10969 if (n_elts)
10970 *n_elts = 1;
10971 return false;
10974 /* Return a nonzero value to say to return the function value in
10975 memory, just as large structures are always returned. TYPE will be
10976 the data type of the value, and FNTYPE will be the type of the
10977 function doing the returning, or @code{NULL} for libcalls.
10979 The AIX ABI for the RS/6000 specifies that all structures are
10980 returned in memory. The Darwin ABI does the same.
10982 For the Darwin 64 Bit ABI, a function result can be returned in
10983 registers or in memory, depending on the size of the return data
10984 type. If it is returned in registers, the value occupies the same
10985 registers as it would if it were the first and only function
10986 argument. Otherwise, the function places its result in memory at
10987 the location pointed to by GPR3.
10989 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10990 but a draft put them in memory, and GCC used to implement the draft
10991 instead of the final standard. Therefore, aix_struct_return
10992 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10993 compatibility can change DRAFT_V4_STRUCT_RET to override the
10994 default, and -m switches get the final word. See
10995 rs6000_option_override_internal for more details.
10997 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10998 long double support is enabled. These values are returned in memory.
11000 int_size_in_bytes returns -1 for variable size objects, which go in
11001 memory always. The cast to unsigned makes -1 > 8. */
11003 static bool
11004 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11006 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11007 if (TARGET_MACHO
11008 && rs6000_darwin64_abi
11009 && TREE_CODE (type) == RECORD_TYPE
11010 && int_size_in_bytes (type) > 0)
11012 CUMULATIVE_ARGS valcum;
11013 rtx valret;
11015 valcum.words = 0;
11016 valcum.fregno = FP_ARG_MIN_REG;
11017 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11018 /* Do a trial code generation as if this were going to be passed
11019 as an argument; if any part goes in memory, we return NULL. */
11020 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11021 if (valret)
11022 return false;
11023 /* Otherwise fall through to more conventional ABI rules. */
11026 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11027 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11028 NULL, NULL))
11029 return false;
11031 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11032 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11033 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11034 return false;
11036 if (AGGREGATE_TYPE_P (type)
11037 && (aix_struct_return
11038 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11039 return true;
11041 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11042 modes only exist for GCC vector types if -maltivec. */
11043 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11044 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11045 return false;
11047 /* Return synthetic vectors in memory. */
11048 if (TREE_CODE (type) == VECTOR_TYPE
11049 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11051 static bool warned_for_return_big_vectors = false;
11052 if (!warned_for_return_big_vectors)
11054 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11055 "non-standard ABI extension with no compatibility guarantee");
11056 warned_for_return_big_vectors = true;
11058 return true;
11061 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11062 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11063 return true;
11065 return false;
11068 /* Specify whether values returned in registers should be at the most
11069 significant end of a register. We want aggregates returned by
11070 value to match the way aggregates are passed to functions. */
11072 static bool
11073 rs6000_return_in_msb (const_tree valtype)
11075 return (DEFAULT_ABI == ABI_ELFv2
11076 && BYTES_BIG_ENDIAN
11077 && AGGREGATE_TYPE_P (valtype)
11078 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11081 #ifdef HAVE_AS_GNU_ATTRIBUTE
11082 /* Return TRUE if a call to function FNDECL may be one that
11083 potentially affects the function calling ABI of the object file. */
11085 static bool
11086 call_ABI_of_interest (tree fndecl)
11088 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11090 struct cgraph_node *c_node;
11092 /* Libcalls are always interesting. */
11093 if (fndecl == NULL_TREE)
11094 return true;
11096 /* Any call to an external function is interesting. */
11097 if (DECL_EXTERNAL (fndecl))
11098 return true;
11100 /* Interesting functions that we are emitting in this object file. */
11101 c_node = cgraph_node::get (fndecl);
11102 c_node = c_node->ultimate_alias_target ();
11103 return !c_node->only_called_directly_p ();
11105 return false;
11107 #endif
11109 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11110 for a call to a function whose data type is FNTYPE.
11111 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11113 For incoming args we set the number of arguments in the prototype large
11114 so we never return a PARALLEL. */
11116 void
11117 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11118 rtx libname ATTRIBUTE_UNUSED, int incoming,
11119 int libcall, int n_named_args,
11120 tree fndecl ATTRIBUTE_UNUSED,
11121 machine_mode return_mode ATTRIBUTE_UNUSED)
11123 static CUMULATIVE_ARGS zero_cumulative;
11125 *cum = zero_cumulative;
11126 cum->words = 0;
11127 cum->fregno = FP_ARG_MIN_REG;
11128 cum->vregno = ALTIVEC_ARG_MIN_REG;
11129 cum->prototype = (fntype && prototype_p (fntype));
11130 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11131 ? CALL_LIBCALL : CALL_NORMAL);
11132 cum->sysv_gregno = GP_ARG_MIN_REG;
11133 cum->stdarg = stdarg_p (fntype);
11134 cum->libcall = libcall;
11136 cum->nargs_prototype = 0;
11137 if (incoming || cum->prototype)
11138 cum->nargs_prototype = n_named_args;
11140 /* Check for a longcall attribute. */
11141 if ((!fntype && rs6000_default_long_calls)
11142 || (fntype
11143 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11144 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11145 cum->call_cookie |= CALL_LONG;
11147 if (TARGET_DEBUG_ARG)
11149 fprintf (stderr, "\ninit_cumulative_args:");
11150 if (fntype)
11152 tree ret_type = TREE_TYPE (fntype);
11153 fprintf (stderr, " ret code = %s,",
11154 get_tree_code_name (TREE_CODE (ret_type)));
11157 if (cum->call_cookie & CALL_LONG)
11158 fprintf (stderr, " longcall,");
11160 fprintf (stderr, " proto = %d, nargs = %d\n",
11161 cum->prototype, cum->nargs_prototype);
11164 #ifdef HAVE_AS_GNU_ATTRIBUTE
11165 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11167 cum->escapes = call_ABI_of_interest (fndecl);
11168 if (cum->escapes)
11170 tree return_type;
11172 if (fntype)
11174 return_type = TREE_TYPE (fntype);
11175 return_mode = TYPE_MODE (return_type);
11177 else
11178 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11180 if (return_type != NULL)
11182 if (TREE_CODE (return_type) == RECORD_TYPE
11183 && TYPE_TRANSPARENT_AGGR (return_type))
11185 return_type = TREE_TYPE (first_field (return_type));
11186 return_mode = TYPE_MODE (return_type);
11188 if (AGGREGATE_TYPE_P (return_type)
11189 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11190 <= 8))
11191 rs6000_returns_struct = true;
11193 if (SCALAR_FLOAT_MODE_P (return_mode))
11195 rs6000_passes_float = true;
11196 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11197 && (FLOAT128_IBM_P (return_mode)
11198 || FLOAT128_IEEE_P (return_mode)
11199 || (return_type != NULL
11200 && (TYPE_MAIN_VARIANT (return_type)
11201 == long_double_type_node))))
11202 rs6000_passes_long_double = true;
11204 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11205 || SPE_VECTOR_MODE (return_mode))
11206 rs6000_passes_vector = true;
11209 #endif
11211 if (fntype
11212 && !TARGET_ALTIVEC
11213 && TARGET_ALTIVEC_ABI
11214 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11216 error ("cannot return value in vector register because"
11217 " altivec instructions are disabled, use -maltivec"
11218 " to enable them");
11222 /* The mode the ABI uses for a word. This is not the same as word_mode
11223 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11225 static machine_mode
11226 rs6000_abi_word_mode (void)
11228 return TARGET_32BIT ? SImode : DImode;
11231 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11232 static char *
11233 rs6000_offload_options (void)
11235 if (TARGET_64BIT)
11236 return xstrdup ("-foffload-abi=lp64");
11237 else
11238 return xstrdup ("-foffload-abi=ilp32");
11241 /* On rs6000, function arguments are promoted, as are function return
11242 values. */
11244 static machine_mode
11245 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11246 machine_mode mode,
11247 int *punsignedp ATTRIBUTE_UNUSED,
11248 const_tree, int)
11250 PROMOTE_MODE (mode, *punsignedp, type);
11252 return mode;
11255 /* Return true if TYPE must be passed on the stack and not in registers. */
11257 static bool
11258 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11260 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11261 return must_pass_in_stack_var_size (mode, type);
11262 else
11263 return must_pass_in_stack_var_size_or_pad (mode, type);
11266 static inline bool
11267 is_complex_IBM_long_double (machine_mode mode)
11269 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11272 /* Whether ABI_V4 passes MODE args to a function in floating point
11273 registers. */
11275 static bool
11276 abi_v4_pass_in_fpr (machine_mode mode)
11278 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11279 return false;
11280 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11281 return true;
11282 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11283 return true;
11284 /* ABI_V4 passes complex IBM long double in 8 gprs.
11285 Stupid, but we can't change the ABI now. */
11286 if (is_complex_IBM_long_double (mode))
11287 return false;
11288 if (FLOAT128_2REG_P (mode))
11289 return true;
11290 if (DECIMAL_FLOAT_MODE_P (mode))
11291 return true;
11292 return false;
11295 /* If defined, a C expression which determines whether, and in which
11296 direction, to pad out an argument with extra space. The value
11297 should be of type `enum direction': either `upward' to pad above
11298 the argument, `downward' to pad below, or `none' to inhibit
11299 padding.
11301 For the AIX ABI structs are always stored left shifted in their
11302 argument slot. */
11304 enum direction
11305 function_arg_padding (machine_mode mode, const_tree type)
11307 #ifndef AGGREGATE_PADDING_FIXED
11308 #define AGGREGATE_PADDING_FIXED 0
11309 #endif
11310 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11311 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11312 #endif
11314 if (!AGGREGATE_PADDING_FIXED)
11316 /* GCC used to pass structures of the same size as integer types as
11317 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11318 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11319 passed padded downward, except that -mstrict-align further
11320 muddied the water in that multi-component structures of 2 and 4
11321 bytes in size were passed padded upward.
11323 The following arranges for best compatibility with previous
11324 versions of gcc, but removes the -mstrict-align dependency. */
11325 if (BYTES_BIG_ENDIAN)
11327 HOST_WIDE_INT size = 0;
11329 if (mode == BLKmode)
11331 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11332 size = int_size_in_bytes (type);
11334 else
11335 size = GET_MODE_SIZE (mode);
11337 if (size == 1 || size == 2 || size == 4)
11338 return downward;
11340 return upward;
11343 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11345 if (type != 0 && AGGREGATE_TYPE_P (type))
11346 return upward;
11349 /* Fall back to the default. */
11350 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11353 /* If defined, a C expression that gives the alignment boundary, in bits,
11354 of an argument with the specified mode and type. If it is not defined,
11355 PARM_BOUNDARY is used for all arguments.
11357 V.4 wants long longs and doubles to be double word aligned. Just
11358 testing the mode size is a boneheaded way to do this as it means
11359 that other types such as complex int are also double word aligned.
11360 However, we're stuck with this because changing the ABI might break
11361 existing library interfaces.
11363 Doubleword align SPE vectors.
11364 Quadword align Altivec/VSX vectors.
11365 Quadword align large synthetic vector types. */
11367 static unsigned int
11368 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11370 machine_mode elt_mode;
11371 int n_elts;
11373 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11375 if (DEFAULT_ABI == ABI_V4
11376 && (GET_MODE_SIZE (mode) == 8
11377 || (TARGET_HARD_FLOAT
11378 && TARGET_FPRS
11379 && !is_complex_IBM_long_double (mode)
11380 && FLOAT128_2REG_P (mode))))
11381 return 64;
11382 else if (FLOAT128_VECTOR_P (mode))
11383 return 128;
11384 else if (SPE_VECTOR_MODE (mode)
11385 || (type && TREE_CODE (type) == VECTOR_TYPE
11386 && int_size_in_bytes (type) >= 8
11387 && int_size_in_bytes (type) < 16))
11388 return 64;
11389 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11390 || (type && TREE_CODE (type) == VECTOR_TYPE
11391 && int_size_in_bytes (type) >= 16))
11392 return 128;
11394 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11395 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11396 -mcompat-align-parm is used. */
11397 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11398 || DEFAULT_ABI == ABI_ELFv2)
11399 && type && TYPE_ALIGN (type) > 64)
11401 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11402 or homogeneous float/vector aggregates here. We already handled
11403 vector aggregates above, but still need to check for float here. */
11404 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11405 && !SCALAR_FLOAT_MODE_P (elt_mode));
11407 /* We used to check for BLKmode instead of the above aggregate type
11408 check. Warn when this results in any difference to the ABI. */
11409 if (aggregate_p != (mode == BLKmode))
11411 static bool warned;
11412 if (!warned && warn_psabi)
11414 warned = true;
11415 inform (input_location,
11416 "the ABI of passing aggregates with %d-byte alignment"
11417 " has changed in GCC 5",
11418 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11422 if (aggregate_p)
11423 return 128;
11426 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11427 implement the "aggregate type" check as a BLKmode check here; this
11428 means certain aggregate types are in fact not aligned. */
11429 if (TARGET_MACHO && rs6000_darwin64_abi
11430 && mode == BLKmode
11431 && type && TYPE_ALIGN (type) > 64)
11432 return 128;
11434 return PARM_BOUNDARY;
11437 /* The offset in words to the start of the parameter save area. */
11439 static unsigned int
11440 rs6000_parm_offset (void)
11442 return (DEFAULT_ABI == ABI_V4 ? 2
11443 : DEFAULT_ABI == ABI_ELFv2 ? 4
11444 : 6);
11447 /* For a function parm of MODE and TYPE, return the starting word in
11448 the parameter area. NWORDS of the parameter area are already used. */
11450 static unsigned int
11451 rs6000_parm_start (machine_mode mode, const_tree type,
11452 unsigned int nwords)
11454 unsigned int align;
11456 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11457 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11460 /* Compute the size (in words) of a function argument. */
11462 static unsigned long
11463 rs6000_arg_size (machine_mode mode, const_tree type)
11465 unsigned long size;
11467 if (mode != BLKmode)
11468 size = GET_MODE_SIZE (mode);
11469 else
11470 size = int_size_in_bytes (type);
11472 if (TARGET_32BIT)
11473 return (size + 3) >> 2;
11474 else
11475 return (size + 7) >> 3;
11478 /* Use this to flush pending int fields. */
11480 static void
11481 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11482 HOST_WIDE_INT bitpos, int final)
11484 unsigned int startbit, endbit;
11485 int intregs, intoffset;
11486 machine_mode mode;
11488 /* Handle the situations where a float is taking up the first half
11489 of the GPR, and the other half is empty (typically due to
11490 alignment restrictions). We can detect this by a 8-byte-aligned
11491 int field, or by seeing that this is the final flush for this
11492 argument. Count the word and continue on. */
11493 if (cum->floats_in_gpr == 1
11494 && (cum->intoffset % 64 == 0
11495 || (cum->intoffset == -1 && final)))
11497 cum->words++;
11498 cum->floats_in_gpr = 0;
11501 if (cum->intoffset == -1)
11502 return;
11504 intoffset = cum->intoffset;
11505 cum->intoffset = -1;
11506 cum->floats_in_gpr = 0;
11508 if (intoffset % BITS_PER_WORD != 0)
11510 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11511 MODE_INT, 0);
11512 if (mode == BLKmode)
11514 /* We couldn't find an appropriate mode, which happens,
11515 e.g., in packed structs when there are 3 bytes to load.
11516 Back intoffset back to the beginning of the word in this
11517 case. */
11518 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11522 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11523 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11524 intregs = (endbit - startbit) / BITS_PER_WORD;
11525 cum->words += intregs;
11526 /* words should be unsigned. */
11527 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11529 int pad = (endbit/BITS_PER_WORD) - cum->words;
11530 cum->words += pad;
11534 /* The darwin64 ABI calls for us to recurse down through structs,
11535 looking for elements passed in registers. Unfortunately, we have
11536 to track int register count here also because of misalignments
11537 in powerpc alignment mode. */
11539 static void
11540 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11541 const_tree type,
11542 HOST_WIDE_INT startbitpos)
11544 tree f;
11546 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11547 if (TREE_CODE (f) == FIELD_DECL)
11549 HOST_WIDE_INT bitpos = startbitpos;
11550 tree ftype = TREE_TYPE (f);
11551 machine_mode mode;
11552 if (ftype == error_mark_node)
11553 continue;
11554 mode = TYPE_MODE (ftype);
11556 if (DECL_SIZE (f) != 0
11557 && tree_fits_uhwi_p (bit_position (f)))
11558 bitpos += int_bit_position (f);
11560 /* ??? FIXME: else assume zero offset. */
11562 if (TREE_CODE (ftype) == RECORD_TYPE)
11563 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11564 else if (USE_FP_FOR_ARG_P (cum, mode))
11566 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11567 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11568 cum->fregno += n_fpregs;
11569 /* Single-precision floats present a special problem for
11570 us, because they are smaller than an 8-byte GPR, and so
11571 the structure-packing rules combined with the standard
11572 varargs behavior mean that we want to pack float/float
11573 and float/int combinations into a single register's
11574 space. This is complicated by the arg advance flushing,
11575 which works on arbitrarily large groups of int-type
11576 fields. */
11577 if (mode == SFmode)
11579 if (cum->floats_in_gpr == 1)
11581 /* Two floats in a word; count the word and reset
11582 the float count. */
11583 cum->words++;
11584 cum->floats_in_gpr = 0;
11586 else if (bitpos % 64 == 0)
11588 /* A float at the beginning of an 8-byte word;
11589 count it and put off adjusting cum->words until
11590 we see if a arg advance flush is going to do it
11591 for us. */
11592 cum->floats_in_gpr++;
11594 else
11596 /* The float is at the end of a word, preceded
11597 by integer fields, so the arg advance flush
11598 just above has already set cum->words and
11599 everything is taken care of. */
11602 else
11603 cum->words += n_fpregs;
11605 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11607 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11608 cum->vregno++;
11609 cum->words += 2;
11611 else if (cum->intoffset == -1)
11612 cum->intoffset = bitpos;
11616 /* Check for an item that needs to be considered specially under the darwin 64
11617 bit ABI. These are record types where the mode is BLK or the structure is
11618 8 bytes in size. */
11619 static int
11620 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11622 return rs6000_darwin64_abi
11623 && ((mode == BLKmode
11624 && TREE_CODE (type) == RECORD_TYPE
11625 && int_size_in_bytes (type) > 0)
11626 || (type && TREE_CODE (type) == RECORD_TYPE
11627 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11630 /* Update the data in CUM to advance over an argument
11631 of mode MODE and data type TYPE.
11632 (TYPE is null for libcalls where that information may not be available.)
11634 Note that for args passed by reference, function_arg will be called
11635 with MODE and TYPE set to that of the pointer to the arg, not the arg
11636 itself. */
11638 static void
11639 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11640 const_tree type, bool named, int depth)
11642 machine_mode elt_mode;
11643 int n_elts;
11645 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11647 /* Only tick off an argument if we're not recursing. */
11648 if (depth == 0)
11649 cum->nargs_prototype--;
11651 #ifdef HAVE_AS_GNU_ATTRIBUTE
11652 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11653 && cum->escapes)
11655 if (SCALAR_FLOAT_MODE_P (mode))
11657 rs6000_passes_float = true;
11658 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11659 && (FLOAT128_IBM_P (mode)
11660 || FLOAT128_IEEE_P (mode)
11661 || (type != NULL
11662 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11663 rs6000_passes_long_double = true;
11665 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11666 || (SPE_VECTOR_MODE (mode)
11667 && !cum->stdarg
11668 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11669 rs6000_passes_vector = true;
11671 #endif
11673 if (TARGET_ALTIVEC_ABI
11674 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11675 || (type && TREE_CODE (type) == VECTOR_TYPE
11676 && int_size_in_bytes (type) == 16)))
11678 bool stack = false;
11680 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11682 cum->vregno += n_elts;
11684 if (!TARGET_ALTIVEC)
11685 error ("cannot pass argument in vector register because"
11686 " altivec instructions are disabled, use -maltivec"
11687 " to enable them");
11689 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11690 even if it is going to be passed in a vector register.
11691 Darwin does the same for variable-argument functions. */
11692 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11693 && TARGET_64BIT)
11694 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11695 stack = true;
11697 else
11698 stack = true;
11700 if (stack)
11702 int align;
11704 /* Vector parameters must be 16-byte aligned. In 32-bit
11705 mode this means we need to take into account the offset
11706 to the parameter save area. In 64-bit mode, they just
11707 have to start on an even word, since the parameter save
11708 area is 16-byte aligned. */
11709 if (TARGET_32BIT)
11710 align = -(rs6000_parm_offset () + cum->words) & 3;
11711 else
11712 align = cum->words & 1;
11713 cum->words += align + rs6000_arg_size (mode, type);
11715 if (TARGET_DEBUG_ARG)
11717 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11718 cum->words, align);
11719 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11720 cum->nargs_prototype, cum->prototype,
11721 GET_MODE_NAME (mode));
11725 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11726 && !cum->stdarg
11727 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11728 cum->sysv_gregno++;
11730 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11732 int size = int_size_in_bytes (type);
11733 /* Variable sized types have size == -1 and are
11734 treated as if consisting entirely of ints.
11735 Pad to 16 byte boundary if needed. */
11736 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11737 && (cum->words % 2) != 0)
11738 cum->words++;
11739 /* For varargs, we can just go up by the size of the struct. */
11740 if (!named)
11741 cum->words += (size + 7) / 8;
11742 else
11744 /* It is tempting to say int register count just goes up by
11745 sizeof(type)/8, but this is wrong in a case such as
11746 { int; double; int; } [powerpc alignment]. We have to
11747 grovel through the fields for these too. */
11748 cum->intoffset = 0;
11749 cum->floats_in_gpr = 0;
11750 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11751 rs6000_darwin64_record_arg_advance_flush (cum,
11752 size * BITS_PER_UNIT, 1);
11754 if (TARGET_DEBUG_ARG)
11756 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11757 cum->words, TYPE_ALIGN (type), size);
11758 fprintf (stderr,
11759 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11760 cum->nargs_prototype, cum->prototype,
11761 GET_MODE_NAME (mode));
11764 else if (DEFAULT_ABI == ABI_V4)
11766 if (abi_v4_pass_in_fpr (mode))
11768 /* _Decimal128 must use an even/odd register pair. This assumes
11769 that the register number is odd when fregno is odd. */
11770 if (mode == TDmode && (cum->fregno % 2) == 1)
11771 cum->fregno++;
11773 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11774 <= FP_ARG_V4_MAX_REG)
11775 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11776 else
11778 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11779 if (mode == DFmode || FLOAT128_IBM_P (mode)
11780 || mode == DDmode || mode == TDmode)
11781 cum->words += cum->words & 1;
11782 cum->words += rs6000_arg_size (mode, type);
11785 else
11787 int n_words = rs6000_arg_size (mode, type);
11788 int gregno = cum->sysv_gregno;
11790 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11791 (r7,r8) or (r9,r10). As does any other 2 word item such
11792 as complex int due to a historical mistake. */
11793 if (n_words == 2)
11794 gregno += (1 - gregno) & 1;
11796 /* Multi-reg args are not split between registers and stack. */
11797 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11799 /* Long long and SPE vectors are aligned on the stack.
11800 So are other 2 word items such as complex int due to
11801 a historical mistake. */
11802 if (n_words == 2)
11803 cum->words += cum->words & 1;
11804 cum->words += n_words;
11807 /* Note: continuing to accumulate gregno past when we've started
11808 spilling to the stack indicates the fact that we've started
11809 spilling to the stack to expand_builtin_saveregs. */
11810 cum->sysv_gregno = gregno + n_words;
11813 if (TARGET_DEBUG_ARG)
11815 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11816 cum->words, cum->fregno);
11817 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11818 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11819 fprintf (stderr, "mode = %4s, named = %d\n",
11820 GET_MODE_NAME (mode), named);
11823 else
11825 int n_words = rs6000_arg_size (mode, type);
11826 int start_words = cum->words;
11827 int align_words = rs6000_parm_start (mode, type, start_words);
11829 cum->words = align_words + n_words;
11831 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11833 /* _Decimal128 must be passed in an even/odd float register pair.
11834 This assumes that the register number is odd when fregno is
11835 odd. */
11836 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11837 cum->fregno++;
11838 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11841 if (TARGET_DEBUG_ARG)
11843 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11844 cum->words, cum->fregno);
11845 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11846 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11847 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11848 named, align_words - start_words, depth);
11853 static void
11854 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11855 const_tree type, bool named)
11857 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11861 static rtx
11862 spe_build_register_parallel (machine_mode mode, int gregno)
11864 rtx r1, r3, r5, r7;
11866 switch (mode)
11868 case DFmode:
11869 r1 = gen_rtx_REG (DImode, gregno);
11870 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11871 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11873 case DCmode:
11874 case TFmode:
11875 r1 = gen_rtx_REG (DImode, gregno);
11876 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11877 r3 = gen_rtx_REG (DImode, gregno + 2);
11878 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11879 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11881 case TCmode:
11882 r1 = gen_rtx_REG (DImode, gregno);
11883 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11884 r3 = gen_rtx_REG (DImode, gregno + 2);
11885 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11886 r5 = gen_rtx_REG (DImode, gregno + 4);
11887 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11888 r7 = gen_rtx_REG (DImode, gregno + 6);
11889 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11890 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11892 default:
11893 gcc_unreachable ();
11897 /* Determine where to put a SIMD argument on the SPE. */
11898 static rtx
11899 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11900 const_tree type)
11902 int gregno = cum->sysv_gregno;
11904 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11905 are passed and returned in a pair of GPRs for ABI compatibility. */
11906 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11907 || mode == DCmode || mode == TCmode))
11909 int n_words = rs6000_arg_size (mode, type);
11911 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11912 if (mode == DFmode)
11913 gregno += (1 - gregno) & 1;
11915 /* Multi-reg args are not split between registers and stack. */
11916 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11917 return NULL_RTX;
11919 return spe_build_register_parallel (mode, gregno);
11921 if (cum->stdarg)
11923 int n_words = rs6000_arg_size (mode, type);
11925 /* SPE vectors are put in odd registers. */
11926 if (n_words == 2 && (gregno & 1) == 0)
11927 gregno += 1;
11929 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11931 rtx r1, r2;
11932 machine_mode m = SImode;
11934 r1 = gen_rtx_REG (m, gregno);
11935 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11936 r2 = gen_rtx_REG (m, gregno + 1);
11937 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11938 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11940 else
11941 return NULL_RTX;
11943 else
11945 if (gregno <= GP_ARG_MAX_REG)
11946 return gen_rtx_REG (mode, gregno);
11947 else
11948 return NULL_RTX;
11952 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11953 structure between cum->intoffset and bitpos to integer registers. */
11955 static void
11956 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11957 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11959 machine_mode mode;
11960 unsigned int regno;
11961 unsigned int startbit, endbit;
11962 int this_regno, intregs, intoffset;
11963 rtx reg;
11965 if (cum->intoffset == -1)
11966 return;
11968 intoffset = cum->intoffset;
11969 cum->intoffset = -1;
11971 /* If this is the trailing part of a word, try to only load that
11972 much into the register. Otherwise load the whole register. Note
11973 that in the latter case we may pick up unwanted bits. It's not a
11974 problem at the moment but may wish to revisit. */
11976 if (intoffset % BITS_PER_WORD != 0)
11978 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11979 MODE_INT, 0);
11980 if (mode == BLKmode)
11982 /* We couldn't find an appropriate mode, which happens,
11983 e.g., in packed structs when there are 3 bytes to load.
11984 Back intoffset back to the beginning of the word in this
11985 case. */
11986 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11987 mode = word_mode;
11990 else
11991 mode = word_mode;
11993 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11994 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11995 intregs = (endbit - startbit) / BITS_PER_WORD;
11996 this_regno = cum->words + intoffset / BITS_PER_WORD;
11998 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11999 cum->use_stack = 1;
12001 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12002 if (intregs <= 0)
12003 return;
12005 intoffset /= BITS_PER_UNIT;
12008 regno = GP_ARG_MIN_REG + this_regno;
12009 reg = gen_rtx_REG (mode, regno);
12010 rvec[(*k)++] =
12011 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12013 this_regno += 1;
12014 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12015 mode = word_mode;
12016 intregs -= 1;
12018 while (intregs > 0);
12021 /* Recursive workhorse for the following. */
12023 static void
12024 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12025 HOST_WIDE_INT startbitpos, rtx rvec[],
12026 int *k)
12028 tree f;
12030 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12031 if (TREE_CODE (f) == FIELD_DECL)
12033 HOST_WIDE_INT bitpos = startbitpos;
12034 tree ftype = TREE_TYPE (f);
12035 machine_mode mode;
12036 if (ftype == error_mark_node)
12037 continue;
12038 mode = TYPE_MODE (ftype);
12040 if (DECL_SIZE (f) != 0
12041 && tree_fits_uhwi_p (bit_position (f)))
12042 bitpos += int_bit_position (f);
12044 /* ??? FIXME: else assume zero offset. */
12046 if (TREE_CODE (ftype) == RECORD_TYPE)
12047 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12048 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12050 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12051 #if 0
12052 switch (mode)
12054 case SCmode: mode = SFmode; break;
12055 case DCmode: mode = DFmode; break;
12056 case TCmode: mode = TFmode; break;
12057 default: break;
12059 #endif
12060 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12061 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12063 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12064 && (mode == TFmode || mode == TDmode));
12065 /* Long double or _Decimal128 split over regs and memory. */
12066 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12067 cum->use_stack=1;
12069 rvec[(*k)++]
12070 = gen_rtx_EXPR_LIST (VOIDmode,
12071 gen_rtx_REG (mode, cum->fregno++),
12072 GEN_INT (bitpos / BITS_PER_UNIT));
12073 if (FLOAT128_2REG_P (mode))
12074 cum->fregno++;
12076 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12078 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12079 rvec[(*k)++]
12080 = gen_rtx_EXPR_LIST (VOIDmode,
12081 gen_rtx_REG (mode, cum->vregno++),
12082 GEN_INT (bitpos / BITS_PER_UNIT));
12084 else if (cum->intoffset == -1)
12085 cum->intoffset = bitpos;
12089 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12090 the register(s) to be used for each field and subfield of a struct
12091 being passed by value, along with the offset of where the
12092 register's value may be found in the block. FP fields go in FP
12093 register, vector fields go in vector registers, and everything
12094 else goes in int registers, packed as in memory.
12096 This code is also used for function return values. RETVAL indicates
12097 whether this is the case.
12099 Much of this is taken from the SPARC V9 port, which has a similar
12100 calling convention. */
12102 static rtx
12103 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12104 bool named, bool retval)
12106 rtx rvec[FIRST_PSEUDO_REGISTER];
12107 int k = 1, kbase = 1;
12108 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12109 /* This is a copy; modifications are not visible to our caller. */
12110 CUMULATIVE_ARGS copy_cum = *orig_cum;
12111 CUMULATIVE_ARGS *cum = &copy_cum;
12113 /* Pad to 16 byte boundary if needed. */
12114 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12115 && (cum->words % 2) != 0)
12116 cum->words++;
12118 cum->intoffset = 0;
12119 cum->use_stack = 0;
12120 cum->named = named;
12122 /* Put entries into rvec[] for individual FP and vector fields, and
12123 for the chunks of memory that go in int regs. Note we start at
12124 element 1; 0 is reserved for an indication of using memory, and
12125 may or may not be filled in below. */
12126 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12127 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12129 /* If any part of the struct went on the stack put all of it there.
12130 This hack is because the generic code for
12131 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12132 parts of the struct are not at the beginning. */
12133 if (cum->use_stack)
12135 if (retval)
12136 return NULL_RTX; /* doesn't go in registers at all */
12137 kbase = 0;
12138 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12140 if (k > 1 || cum->use_stack)
12141 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12142 else
12143 return NULL_RTX;
12146 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12148 static rtx
12149 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12150 int align_words)
12152 int n_units;
12153 int i, k;
12154 rtx rvec[GP_ARG_NUM_REG + 1];
12156 if (align_words >= GP_ARG_NUM_REG)
12157 return NULL_RTX;
12159 n_units = rs6000_arg_size (mode, type);
12161 /* Optimize the simple case where the arg fits in one gpr, except in
12162 the case of BLKmode due to assign_parms assuming that registers are
12163 BITS_PER_WORD wide. */
12164 if (n_units == 0
12165 || (n_units == 1 && mode != BLKmode))
12166 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12168 k = 0;
12169 if (align_words + n_units > GP_ARG_NUM_REG)
12170 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12171 using a magic NULL_RTX component.
12172 This is not strictly correct. Only some of the arg belongs in
12173 memory, not all of it. However, the normal scheme using
12174 function_arg_partial_nregs can result in unusual subregs, eg.
12175 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12176 store the whole arg to memory is often more efficient than code
12177 to store pieces, and we know that space is available in the right
12178 place for the whole arg. */
12179 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12181 i = 0;
12184 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12185 rtx off = GEN_INT (i++ * 4);
12186 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12188 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12190 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12193 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12194 but must also be copied into the parameter save area starting at
12195 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12196 to the GPRs and/or memory. Return the number of elements used. */
12198 static int
12199 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12200 int align_words, rtx *rvec)
12202 int k = 0;
12204 if (align_words < GP_ARG_NUM_REG)
12206 int n_words = rs6000_arg_size (mode, type);
12208 if (align_words + n_words > GP_ARG_NUM_REG
12209 || mode == BLKmode
12210 || (TARGET_32BIT && TARGET_POWERPC64))
12212 /* If this is partially on the stack, then we only
12213 include the portion actually in registers here. */
12214 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12215 int i = 0;
12217 if (align_words + n_words > GP_ARG_NUM_REG)
12219 /* Not all of the arg fits in gprs. Say that it goes in memory
12220 too, using a magic NULL_RTX component. Also see comment in
12221 rs6000_mixed_function_arg for why the normal
12222 function_arg_partial_nregs scheme doesn't work in this case. */
12223 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12228 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12229 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12230 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12232 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12234 else
12236 /* The whole arg fits in gprs. */
12237 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12238 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12241 else
12243 /* It's entirely in memory. */
12244 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12247 return k;
12250 /* RVEC is a vector of K components of an argument of mode MODE.
12251 Construct the final function_arg return value from it. */
12253 static rtx
12254 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12256 gcc_assert (k >= 1);
12258 /* Avoid returning a PARALLEL in the trivial cases. */
12259 if (k == 1)
12261 if (XEXP (rvec[0], 0) == NULL_RTX)
12262 return NULL_RTX;
12264 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12265 return XEXP (rvec[0], 0);
12268 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12271 /* Determine where to put an argument to a function.
12272 Value is zero to push the argument on the stack,
12273 or a hard register in which to store the argument.
12275 MODE is the argument's machine mode.
12276 TYPE is the data type of the argument (as a tree).
12277 This is null for libcalls where that information may
12278 not be available.
12279 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12280 the preceding args and about the function being called. It is
12281 not modified in this routine.
12282 NAMED is nonzero if this argument is a named parameter
12283 (otherwise it is an extra parameter matching an ellipsis).
12285 On RS/6000 the first eight words of non-FP are normally in registers
12286 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12287 Under V.4, the first 8 FP args are in registers.
12289 If this is floating-point and no prototype is specified, we use
12290 both an FP and integer register (or possibly FP reg and stack). Library
12291 functions (when CALL_LIBCALL is set) always have the proper types for args,
12292 so we can pass the FP value just in one register. emit_library_function
12293 doesn't support PARALLEL anyway.
12295 Note that for args passed by reference, function_arg will be called
12296 with MODE and TYPE set to that of the pointer to the arg, not the arg
12297 itself. */
12299 static rtx
12300 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12301 const_tree type, bool named)
12303 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12304 enum rs6000_abi abi = DEFAULT_ABI;
12305 machine_mode elt_mode;
12306 int n_elts;
12308 /* Return a marker to indicate whether CR1 needs to set or clear the
12309 bit that V.4 uses to say fp args were passed in registers.
12310 Assume that we don't need the marker for software floating point,
12311 or compiler generated library calls. */
12312 if (mode == VOIDmode)
12314 if (abi == ABI_V4
12315 && (cum->call_cookie & CALL_LIBCALL) == 0
12316 && (cum->stdarg
12317 || (cum->nargs_prototype < 0
12318 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12320 /* For the SPE, we need to crxor CR6 always. */
12321 if (TARGET_SPE_ABI)
12322 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12323 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12324 return GEN_INT (cum->call_cookie
12325 | ((cum->fregno == FP_ARG_MIN_REG)
12326 ? CALL_V4_SET_FP_ARGS
12327 : CALL_V4_CLEAR_FP_ARGS));
12330 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12333 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12335 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12337 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12338 if (rslt != NULL_RTX)
12339 return rslt;
12340 /* Else fall through to usual handling. */
12343 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12345 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12346 rtx r, off;
12347 int i, k = 0;
12349 /* Do we also need to pass this argument in the parameter save area?
12350 Library support functions for IEEE 128-bit are assumed to not need the
12351 value passed both in GPRs and in vector registers. */
12352 if (TARGET_64BIT && !cum->prototype
12353 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12355 int align_words = ROUND_UP (cum->words, 2);
12356 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12359 /* Describe where this argument goes in the vector registers. */
12360 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12362 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12363 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12364 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12367 return rs6000_finish_function_arg (mode, rvec, k);
12369 else if (TARGET_ALTIVEC_ABI
12370 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12371 || (type && TREE_CODE (type) == VECTOR_TYPE
12372 && int_size_in_bytes (type) == 16)))
12374 if (named || abi == ABI_V4)
12375 return NULL_RTX;
12376 else
12378 /* Vector parameters to varargs functions under AIX or Darwin
12379 get passed in memory and possibly also in GPRs. */
12380 int align, align_words, n_words;
12381 machine_mode part_mode;
12383 /* Vector parameters must be 16-byte aligned. In 32-bit
12384 mode this means we need to take into account the offset
12385 to the parameter save area. In 64-bit mode, they just
12386 have to start on an even word, since the parameter save
12387 area is 16-byte aligned. */
12388 if (TARGET_32BIT)
12389 align = -(rs6000_parm_offset () + cum->words) & 3;
12390 else
12391 align = cum->words & 1;
12392 align_words = cum->words + align;
12394 /* Out of registers? Memory, then. */
12395 if (align_words >= GP_ARG_NUM_REG)
12396 return NULL_RTX;
12398 if (TARGET_32BIT && TARGET_POWERPC64)
12399 return rs6000_mixed_function_arg (mode, type, align_words);
12401 /* The vector value goes in GPRs. Only the part of the
12402 value in GPRs is reported here. */
12403 part_mode = mode;
12404 n_words = rs6000_arg_size (mode, type);
12405 if (align_words + n_words > GP_ARG_NUM_REG)
12406 /* Fortunately, there are only two possibilities, the value
12407 is either wholly in GPRs or half in GPRs and half not. */
12408 part_mode = DImode;
12410 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12413 else if (TARGET_SPE_ABI && TARGET_SPE
12414 && (SPE_VECTOR_MODE (mode)
12415 || (TARGET_E500_DOUBLE && (mode == DFmode
12416 || mode == DCmode
12417 || mode == TFmode
12418 || mode == TCmode))))
12419 return rs6000_spe_function_arg (cum, mode, type);
12421 else if (abi == ABI_V4)
12423 if (abi_v4_pass_in_fpr (mode))
12425 /* _Decimal128 must use an even/odd register pair. This assumes
12426 that the register number is odd when fregno is odd. */
12427 if (mode == TDmode && (cum->fregno % 2) == 1)
12428 cum->fregno++;
12430 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12431 <= FP_ARG_V4_MAX_REG)
12432 return gen_rtx_REG (mode, cum->fregno);
12433 else
12434 return NULL_RTX;
12436 else
12438 int n_words = rs6000_arg_size (mode, type);
12439 int gregno = cum->sysv_gregno;
12441 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12442 (r7,r8) or (r9,r10). As does any other 2 word item such
12443 as complex int due to a historical mistake. */
12444 if (n_words == 2)
12445 gregno += (1 - gregno) & 1;
12447 /* Multi-reg args are not split between registers and stack. */
12448 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12449 return NULL_RTX;
12451 if (TARGET_32BIT && TARGET_POWERPC64)
12452 return rs6000_mixed_function_arg (mode, type,
12453 gregno - GP_ARG_MIN_REG);
12454 return gen_rtx_REG (mode, gregno);
12457 else
12459 int align_words = rs6000_parm_start (mode, type, cum->words);
12461 /* _Decimal128 must be passed in an even/odd float register pair.
12462 This assumes that the register number is odd when fregno is odd. */
12463 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12464 cum->fregno++;
12466 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12468 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12469 rtx r, off;
12470 int i, k = 0;
12471 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12472 int fpr_words;
12474 /* Do we also need to pass this argument in the parameter
12475 save area? */
12476 if (type && (cum->nargs_prototype <= 0
12477 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12478 && TARGET_XL_COMPAT
12479 && align_words >= GP_ARG_NUM_REG)))
12480 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12482 /* Describe where this argument goes in the fprs. */
12483 for (i = 0; i < n_elts
12484 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12486 /* Check if the argument is split over registers and memory.
12487 This can only ever happen for long double or _Decimal128;
12488 complex types are handled via split_complex_arg. */
12489 machine_mode fmode = elt_mode;
12490 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12492 gcc_assert (FLOAT128_2REG_P (fmode));
12493 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12496 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12497 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12498 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12501 /* If there were not enough FPRs to hold the argument, the rest
12502 usually goes into memory. However, if the current position
12503 is still within the register parameter area, a portion may
12504 actually have to go into GPRs.
12506 Note that it may happen that the portion of the argument
12507 passed in the first "half" of the first GPR was already
12508 passed in the last FPR as well.
12510 For unnamed arguments, we already set up GPRs to cover the
12511 whole argument in rs6000_psave_function_arg, so there is
12512 nothing further to do at this point. */
12513 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12514 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12515 && cum->nargs_prototype > 0)
12517 static bool warned;
12519 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12520 int n_words = rs6000_arg_size (mode, type);
12522 align_words += fpr_words;
12523 n_words -= fpr_words;
12527 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12528 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12529 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12531 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12533 if (!warned && warn_psabi)
12535 warned = true;
12536 inform (input_location,
12537 "the ABI of passing homogeneous float aggregates"
12538 " has changed in GCC 5");
12542 return rs6000_finish_function_arg (mode, rvec, k);
12544 else if (align_words < GP_ARG_NUM_REG)
12546 if (TARGET_32BIT && TARGET_POWERPC64)
12547 return rs6000_mixed_function_arg (mode, type, align_words);
12549 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12551 else
12552 return NULL_RTX;
12556 /* For an arg passed partly in registers and partly in memory, this is
12557 the number of bytes passed in registers. For args passed entirely in
12558 registers or entirely in memory, zero. When an arg is described by a
12559 PARALLEL, perhaps using more than one register type, this function
12560 returns the number of bytes used by the first element of the PARALLEL. */
12562 static int
12563 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12564 tree type, bool named)
12566 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12567 bool passed_in_gprs = true;
12568 int ret = 0;
12569 int align_words;
12570 machine_mode elt_mode;
12571 int n_elts;
12573 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12575 if (DEFAULT_ABI == ABI_V4)
12576 return 0;
12578 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12580 /* If we are passing this arg in the fixed parameter save area (gprs or
12581 memory) as well as VRs, we do not use the partial bytes mechanism;
12582 instead, rs6000_function_arg will return a PARALLEL including a memory
12583 element as necessary. Library support functions for IEEE 128-bit are
12584 assumed to not need the value passed both in GPRs and in vector
12585 registers. */
12586 if (TARGET_64BIT && !cum->prototype
12587 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12588 return 0;
12590 /* Otherwise, we pass in VRs only. Check for partial copies. */
12591 passed_in_gprs = false;
12592 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12593 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12596 /* In this complicated case we just disable the partial_nregs code. */
12597 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12598 return 0;
12600 align_words = rs6000_parm_start (mode, type, cum->words);
12602 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12604 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12606 /* If we are passing this arg in the fixed parameter save area
12607 (gprs or memory) as well as FPRs, we do not use the partial
12608 bytes mechanism; instead, rs6000_function_arg will return a
12609 PARALLEL including a memory element as necessary. */
12610 if (type
12611 && (cum->nargs_prototype <= 0
12612 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12613 && TARGET_XL_COMPAT
12614 && align_words >= GP_ARG_NUM_REG)))
12615 return 0;
12617 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12618 passed_in_gprs = false;
12619 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12621 /* Compute number of bytes / words passed in FPRs. If there
12622 is still space available in the register parameter area
12623 *after* that amount, a part of the argument will be passed
12624 in GPRs. In that case, the total amount passed in any
12625 registers is equal to the amount that would have been passed
12626 in GPRs if everything were passed there, so we fall back to
12627 the GPR code below to compute the appropriate value. */
12628 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12629 * MIN (8, GET_MODE_SIZE (elt_mode)));
12630 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12632 if (align_words + fpr_words < GP_ARG_NUM_REG)
12633 passed_in_gprs = true;
12634 else
12635 ret = fpr;
12639 if (passed_in_gprs
12640 && align_words < GP_ARG_NUM_REG
12641 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12642 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12644 if (ret != 0 && TARGET_DEBUG_ARG)
12645 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12647 return ret;
12650 /* A C expression that indicates when an argument must be passed by
12651 reference. If nonzero for an argument, a copy of that argument is
12652 made in memory and a pointer to the argument is passed instead of
12653 the argument itself. The pointer is passed in whatever way is
12654 appropriate for passing a pointer to that type.
12656 Under V.4, aggregates and long double are passed by reference.
12658 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12659 reference unless the AltiVec vector extension ABI is in force.
12661 As an extension to all ABIs, variable sized types are passed by
12662 reference. */
12664 static bool
12665 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12666 machine_mode mode, const_tree type,
12667 bool named ATTRIBUTE_UNUSED)
12669 if (!type)
12670 return 0;
12672 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12673 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12675 if (TARGET_DEBUG_ARG)
12676 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12677 return 1;
12680 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12682 if (TARGET_DEBUG_ARG)
12683 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12684 return 1;
12687 if (int_size_in_bytes (type) < 0)
12689 if (TARGET_DEBUG_ARG)
12690 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12691 return 1;
12694 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12695 modes only exist for GCC vector types if -maltivec. */
12696 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12698 if (TARGET_DEBUG_ARG)
12699 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12700 return 1;
12703 /* Pass synthetic vectors in memory. */
12704 if (TREE_CODE (type) == VECTOR_TYPE
12705 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12707 static bool warned_for_pass_big_vectors = false;
12708 if (TARGET_DEBUG_ARG)
12709 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12710 if (!warned_for_pass_big_vectors)
12712 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12713 "non-standard ABI extension with no compatibility guarantee");
12714 warned_for_pass_big_vectors = true;
12716 return 1;
12719 return 0;
12722 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12723 already processes. Return true if the parameter must be passed
12724 (fully or partially) on the stack. */
12726 static bool
12727 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12729 machine_mode mode;
12730 int unsignedp;
12731 rtx entry_parm;
12733 /* Catch errors. */
12734 if (type == NULL || type == error_mark_node)
12735 return true;
12737 /* Handle types with no storage requirement. */
12738 if (TYPE_MODE (type) == VOIDmode)
12739 return false;
12741 /* Handle complex types. */
12742 if (TREE_CODE (type) == COMPLEX_TYPE)
12743 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12744 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12746 /* Handle transparent aggregates. */
12747 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12748 && TYPE_TRANSPARENT_AGGR (type))
12749 type = TREE_TYPE (first_field (type));
12751 /* See if this arg was passed by invisible reference. */
12752 if (pass_by_reference (get_cumulative_args (args_so_far),
12753 TYPE_MODE (type), type, true))
12754 type = build_pointer_type (type);
12756 /* Find mode as it is passed by the ABI. */
12757 unsignedp = TYPE_UNSIGNED (type);
12758 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12760 /* If we must pass in stack, we need a stack. */
12761 if (rs6000_must_pass_in_stack (mode, type))
12762 return true;
12764 /* If there is no incoming register, we need a stack. */
12765 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12766 if (entry_parm == NULL)
12767 return true;
12769 /* Likewise if we need to pass both in registers and on the stack. */
12770 if (GET_CODE (entry_parm) == PARALLEL
12771 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12772 return true;
12774 /* Also true if we're partially in registers and partially not. */
12775 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12776 return true;
12778 /* Update info on where next arg arrives in registers. */
12779 rs6000_function_arg_advance (args_so_far, mode, type, true);
12780 return false;
12783 /* Return true if FUN has no prototype, has a variable argument
12784 list, or passes any parameter in memory. */
12786 static bool
12787 rs6000_function_parms_need_stack (tree fun, bool incoming)
12789 tree fntype, result;
12790 CUMULATIVE_ARGS args_so_far_v;
12791 cumulative_args_t args_so_far;
12793 if (!fun)
12794 /* Must be a libcall, all of which only use reg parms. */
12795 return false;
12797 fntype = fun;
12798 if (!TYPE_P (fun))
12799 fntype = TREE_TYPE (fun);
12801 /* Varargs functions need the parameter save area. */
12802 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12803 return true;
12805 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12806 args_so_far = pack_cumulative_args (&args_so_far_v);
12808 /* When incoming, we will have been passed the function decl.
12809 It is necessary to use the decl to handle K&R style functions,
12810 where TYPE_ARG_TYPES may not be available. */
12811 if (incoming)
12813 gcc_assert (DECL_P (fun));
12814 result = DECL_RESULT (fun);
12816 else
12817 result = TREE_TYPE (fntype);
12819 if (result && aggregate_value_p (result, fntype))
12821 if (!TYPE_P (result))
12822 result = TREE_TYPE (result);
12823 result = build_pointer_type (result);
12824 rs6000_parm_needs_stack (args_so_far, result);
12827 if (incoming)
12829 tree parm;
12831 for (parm = DECL_ARGUMENTS (fun);
12832 parm && parm != void_list_node;
12833 parm = TREE_CHAIN (parm))
12834 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12835 return true;
12837 else
12839 function_args_iterator args_iter;
12840 tree arg_type;
12842 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12843 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12844 return true;
12847 return false;
12850 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12851 usually a constant depending on the ABI. However, in the ELFv2 ABI
12852 the register parameter area is optional when calling a function that
12853 has a prototype is scope, has no variable argument list, and passes
12854 all parameters in registers. */
12857 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12859 int reg_parm_stack_space;
12861 switch (DEFAULT_ABI)
12863 default:
12864 reg_parm_stack_space = 0;
12865 break;
12867 case ABI_AIX:
12868 case ABI_DARWIN:
12869 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12870 break;
12872 case ABI_ELFv2:
12873 /* ??? Recomputing this every time is a bit expensive. Is there
12874 a place to cache this information? */
12875 if (rs6000_function_parms_need_stack (fun, incoming))
12876 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12877 else
12878 reg_parm_stack_space = 0;
12879 break;
12882 return reg_parm_stack_space;
12885 static void
12886 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12888 int i;
12889 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12891 if (nregs == 0)
12892 return;
12894 for (i = 0; i < nregs; i++)
12896 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12897 if (reload_completed)
12899 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12900 tem = NULL_RTX;
12901 else
12902 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12903 i * GET_MODE_SIZE (reg_mode));
12905 else
12906 tem = replace_equiv_address (tem, XEXP (tem, 0));
12908 gcc_assert (tem);
12910 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12914 /* Perform any needed actions needed for a function that is receiving a
12915 variable number of arguments.
12917 CUM is as above.
12919 MODE and TYPE are the mode and type of the current parameter.
12921 PRETEND_SIZE is a variable that should be set to the amount of stack
12922 that must be pushed by the prolog to pretend that our caller pushed
12925 Normally, this macro will push all remaining incoming registers on the
12926 stack and set PRETEND_SIZE to the length of the registers pushed. */
12928 static void
12929 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12930 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12931 int no_rtl)
12933 CUMULATIVE_ARGS next_cum;
12934 int reg_size = TARGET_32BIT ? 4 : 8;
12935 rtx save_area = NULL_RTX, mem;
12936 int first_reg_offset;
12937 alias_set_type set;
12939 /* Skip the last named argument. */
12940 next_cum = *get_cumulative_args (cum);
12941 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12943 if (DEFAULT_ABI == ABI_V4)
12945 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12947 if (! no_rtl)
12949 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12950 HOST_WIDE_INT offset = 0;
12952 /* Try to optimize the size of the varargs save area.
12953 The ABI requires that ap.reg_save_area is doubleword
12954 aligned, but we don't need to allocate space for all
12955 the bytes, only those to which we actually will save
12956 anything. */
12957 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12958 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12959 if (TARGET_HARD_FLOAT && TARGET_FPRS
12960 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12961 && cfun->va_list_fpr_size)
12963 if (gpr_reg_num)
12964 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12965 * UNITS_PER_FP_WORD;
12966 if (cfun->va_list_fpr_size
12967 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12968 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12969 else
12970 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12971 * UNITS_PER_FP_WORD;
12973 if (gpr_reg_num)
12975 offset = -((first_reg_offset * reg_size) & ~7);
12976 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12978 gpr_reg_num = cfun->va_list_gpr_size;
12979 if (reg_size == 4 && (first_reg_offset & 1))
12980 gpr_reg_num++;
12982 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12984 else if (fpr_size)
12985 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12986 * UNITS_PER_FP_WORD
12987 - (int) (GP_ARG_NUM_REG * reg_size);
12989 if (gpr_size + fpr_size)
12991 rtx reg_save_area
12992 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12993 gcc_assert (GET_CODE (reg_save_area) == MEM);
12994 reg_save_area = XEXP (reg_save_area, 0);
12995 if (GET_CODE (reg_save_area) == PLUS)
12997 gcc_assert (XEXP (reg_save_area, 0)
12998 == virtual_stack_vars_rtx);
12999 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13000 offset += INTVAL (XEXP (reg_save_area, 1));
13002 else
13003 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13006 cfun->machine->varargs_save_offset = offset;
13007 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13010 else
13012 first_reg_offset = next_cum.words;
13013 save_area = crtl->args.internal_arg_pointer;
13015 if (targetm.calls.must_pass_in_stack (mode, type))
13016 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13019 set = get_varargs_alias_set ();
13020 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13021 && cfun->va_list_gpr_size)
13023 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13025 if (va_list_gpr_counter_field)
13026 /* V4 va_list_gpr_size counts number of registers needed. */
13027 n_gpr = cfun->va_list_gpr_size;
13028 else
13029 /* char * va_list instead counts number of bytes needed. */
13030 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13032 if (nregs > n_gpr)
13033 nregs = n_gpr;
13035 mem = gen_rtx_MEM (BLKmode,
13036 plus_constant (Pmode, save_area,
13037 first_reg_offset * reg_size));
13038 MEM_NOTRAP_P (mem) = 1;
13039 set_mem_alias_set (mem, set);
13040 set_mem_align (mem, BITS_PER_WORD);
13042 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13043 nregs);
13046 /* Save FP registers if needed. */
13047 if (DEFAULT_ABI == ABI_V4
13048 && TARGET_HARD_FLOAT && TARGET_FPRS
13049 && ! no_rtl
13050 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13051 && cfun->va_list_fpr_size)
13053 int fregno = next_cum.fregno, nregs;
13054 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13055 rtx lab = gen_label_rtx ();
13056 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13057 * UNITS_PER_FP_WORD);
13059 emit_jump_insn
13060 (gen_rtx_SET (pc_rtx,
13061 gen_rtx_IF_THEN_ELSE (VOIDmode,
13062 gen_rtx_NE (VOIDmode, cr1,
13063 const0_rtx),
13064 gen_rtx_LABEL_REF (VOIDmode, lab),
13065 pc_rtx)));
13067 for (nregs = 0;
13068 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13069 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13071 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13072 ? DFmode : SFmode,
13073 plus_constant (Pmode, save_area, off));
13074 MEM_NOTRAP_P (mem) = 1;
13075 set_mem_alias_set (mem, set);
13076 set_mem_align (mem, GET_MODE_ALIGNMENT (
13077 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13078 ? DFmode : SFmode));
13079 emit_move_insn (mem, gen_rtx_REG (
13080 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13081 ? DFmode : SFmode, fregno));
13084 emit_label (lab);
13088 /* Create the va_list data type. */
13090 static tree
13091 rs6000_build_builtin_va_list (void)
13093 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13095 /* For AIX, prefer 'char *' because that's what the system
13096 header files like. */
13097 if (DEFAULT_ABI != ABI_V4)
13098 return build_pointer_type (char_type_node);
13100 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13101 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13102 get_identifier ("__va_list_tag"), record);
13104 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13105 unsigned_char_type_node);
13106 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13107 unsigned_char_type_node);
13108 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13109 every user file. */
13110 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13111 get_identifier ("reserved"), short_unsigned_type_node);
13112 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13113 get_identifier ("overflow_arg_area"),
13114 ptr_type_node);
13115 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13116 get_identifier ("reg_save_area"),
13117 ptr_type_node);
13119 va_list_gpr_counter_field = f_gpr;
13120 va_list_fpr_counter_field = f_fpr;
13122 DECL_FIELD_CONTEXT (f_gpr) = record;
13123 DECL_FIELD_CONTEXT (f_fpr) = record;
13124 DECL_FIELD_CONTEXT (f_res) = record;
13125 DECL_FIELD_CONTEXT (f_ovf) = record;
13126 DECL_FIELD_CONTEXT (f_sav) = record;
13128 TYPE_STUB_DECL (record) = type_decl;
13129 TYPE_NAME (record) = type_decl;
13130 TYPE_FIELDS (record) = f_gpr;
13131 DECL_CHAIN (f_gpr) = f_fpr;
13132 DECL_CHAIN (f_fpr) = f_res;
13133 DECL_CHAIN (f_res) = f_ovf;
13134 DECL_CHAIN (f_ovf) = f_sav;
13136 layout_type (record);
13138 /* The correct type is an array type of one element. */
13139 return build_array_type (record, build_index_type (size_zero_node));
13142 /* Implement va_start. */
13144 static void
13145 rs6000_va_start (tree valist, rtx nextarg)
13147 HOST_WIDE_INT words, n_gpr, n_fpr;
13148 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13149 tree gpr, fpr, ovf, sav, t;
13151 /* Only SVR4 needs something special. */
13152 if (DEFAULT_ABI != ABI_V4)
13154 std_expand_builtin_va_start (valist, nextarg);
13155 return;
13158 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13159 f_fpr = DECL_CHAIN (f_gpr);
13160 f_res = DECL_CHAIN (f_fpr);
13161 f_ovf = DECL_CHAIN (f_res);
13162 f_sav = DECL_CHAIN (f_ovf);
13164 valist = build_simple_mem_ref (valist);
13165 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13166 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13167 f_fpr, NULL_TREE);
13168 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13169 f_ovf, NULL_TREE);
13170 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13171 f_sav, NULL_TREE);
13173 /* Count number of gp and fp argument registers used. */
13174 words = crtl->args.info.words;
13175 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13176 GP_ARG_NUM_REG);
13177 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13178 FP_ARG_NUM_REG);
13180 if (TARGET_DEBUG_ARG)
13181 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13182 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13183 words, n_gpr, n_fpr);
13185 if (cfun->va_list_gpr_size)
13187 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13188 build_int_cst (NULL_TREE, n_gpr));
13189 TREE_SIDE_EFFECTS (t) = 1;
13190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13193 if (cfun->va_list_fpr_size)
13195 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13196 build_int_cst (NULL_TREE, n_fpr));
13197 TREE_SIDE_EFFECTS (t) = 1;
13198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13200 #ifdef HAVE_AS_GNU_ATTRIBUTE
13201 if (call_ABI_of_interest (cfun->decl))
13202 rs6000_passes_float = true;
13203 #endif
13206 /* Find the overflow area. */
13207 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13208 if (words != 0)
13209 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13210 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13211 TREE_SIDE_EFFECTS (t) = 1;
13212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13214 /* If there were no va_arg invocations, don't set up the register
13215 save area. */
13216 if (!cfun->va_list_gpr_size
13217 && !cfun->va_list_fpr_size
13218 && n_gpr < GP_ARG_NUM_REG
13219 && n_fpr < FP_ARG_V4_MAX_REG)
13220 return;
13222 /* Find the register save area. */
13223 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13224 if (cfun->machine->varargs_save_offset)
13225 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13226 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13227 TREE_SIDE_EFFECTS (t) = 1;
13228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13231 /* Implement va_arg. */
13233 static tree
13234 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13235 gimple_seq *post_p)
13237 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13238 tree gpr, fpr, ovf, sav, reg, t, u;
13239 int size, rsize, n_reg, sav_ofs, sav_scale;
13240 tree lab_false, lab_over, addr;
13241 int align;
13242 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13243 int regalign = 0;
13244 gimple *stmt;
13246 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13248 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13249 return build_va_arg_indirect_ref (t);
13252 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13253 earlier version of gcc, with the property that it always applied alignment
13254 adjustments to the va-args (even for zero-sized types). The cheapest way
13255 to deal with this is to replicate the effect of the part of
13256 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13257 of relevance.
13258 We don't need to check for pass-by-reference because of the test above.
13259 We can return a simplifed answer, since we know there's no offset to add. */
13261 if (((TARGET_MACHO
13262 && rs6000_darwin64_abi)
13263 || DEFAULT_ABI == ABI_ELFv2
13264 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13265 && integer_zerop (TYPE_SIZE (type)))
13267 unsigned HOST_WIDE_INT align, boundary;
13268 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13269 align = PARM_BOUNDARY / BITS_PER_UNIT;
13270 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13271 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13272 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13273 boundary /= BITS_PER_UNIT;
13274 if (boundary > align)
13276 tree t ;
13277 /* This updates arg ptr by the amount that would be necessary
13278 to align the zero-sized (but not zero-alignment) item. */
13279 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13280 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13281 gimplify_and_add (t, pre_p);
13283 t = fold_convert (sizetype, valist_tmp);
13284 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13285 fold_convert (TREE_TYPE (valist),
13286 fold_build2 (BIT_AND_EXPR, sizetype, t,
13287 size_int (-boundary))));
13288 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13289 gimplify_and_add (t, pre_p);
13291 /* Since it is zero-sized there's no increment for the item itself. */
13292 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13293 return build_va_arg_indirect_ref (valist_tmp);
13296 if (DEFAULT_ABI != ABI_V4)
13298 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13300 tree elem_type = TREE_TYPE (type);
13301 machine_mode elem_mode = TYPE_MODE (elem_type);
13302 int elem_size = GET_MODE_SIZE (elem_mode);
13304 if (elem_size < UNITS_PER_WORD)
13306 tree real_part, imag_part;
13307 gimple_seq post = NULL;
13309 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13310 &post);
13311 /* Copy the value into a temporary, lest the formal temporary
13312 be reused out from under us. */
13313 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13314 gimple_seq_add_seq (pre_p, post);
13316 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13317 post_p);
13319 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13323 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13326 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13327 f_fpr = DECL_CHAIN (f_gpr);
13328 f_res = DECL_CHAIN (f_fpr);
13329 f_ovf = DECL_CHAIN (f_res);
13330 f_sav = DECL_CHAIN (f_ovf);
13332 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13333 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13334 f_fpr, NULL_TREE);
13335 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13336 f_ovf, NULL_TREE);
13337 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13338 f_sav, NULL_TREE);
13340 size = int_size_in_bytes (type);
13341 rsize = (size + 3) / 4;
13342 align = 1;
13344 machine_mode mode = TYPE_MODE (type);
13345 if (abi_v4_pass_in_fpr (mode))
13347 /* FP args go in FP registers, if present. */
13348 reg = fpr;
13349 n_reg = (size + 7) / 8;
13350 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13351 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13352 if (mode != SFmode && mode != SDmode)
13353 align = 8;
13355 else
13357 /* Otherwise into GP registers. */
13358 reg = gpr;
13359 n_reg = rsize;
13360 sav_ofs = 0;
13361 sav_scale = 4;
13362 if (n_reg == 2)
13363 align = 8;
13366 /* Pull the value out of the saved registers.... */
13368 lab_over = NULL;
13369 addr = create_tmp_var (ptr_type_node, "addr");
13371 /* AltiVec vectors never go in registers when -mabi=altivec. */
13372 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13373 align = 16;
13374 else
13376 lab_false = create_artificial_label (input_location);
13377 lab_over = create_artificial_label (input_location);
13379 /* Long long and SPE vectors are aligned in the registers.
13380 As are any other 2 gpr item such as complex int due to a
13381 historical mistake. */
13382 u = reg;
13383 if (n_reg == 2 && reg == gpr)
13385 regalign = 1;
13386 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13387 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13388 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13389 unshare_expr (reg), u);
13391 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13392 reg number is 0 for f1, so we want to make it odd. */
13393 else if (reg == fpr && mode == TDmode)
13395 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13396 build_int_cst (TREE_TYPE (reg), 1));
13397 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13400 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13401 t = build2 (GE_EXPR, boolean_type_node, u, t);
13402 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13403 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13404 gimplify_and_add (t, pre_p);
13406 t = sav;
13407 if (sav_ofs)
13408 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13410 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13411 build_int_cst (TREE_TYPE (reg), n_reg));
13412 u = fold_convert (sizetype, u);
13413 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13414 t = fold_build_pointer_plus (t, u);
13416 /* _Decimal32 varargs are located in the second word of the 64-bit
13417 FP register for 32-bit binaries. */
13418 if (TARGET_32BIT
13419 && TARGET_HARD_FLOAT && TARGET_FPRS
13420 && mode == SDmode)
13421 t = fold_build_pointer_plus_hwi (t, size);
13423 gimplify_assign (addr, t, pre_p);
13425 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13427 stmt = gimple_build_label (lab_false);
13428 gimple_seq_add_stmt (pre_p, stmt);
13430 if ((n_reg == 2 && !regalign) || n_reg > 2)
13432 /* Ensure that we don't find any more args in regs.
13433 Alignment has taken care of for special cases. */
13434 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13438 /* ... otherwise out of the overflow area. */
13440 /* Care for on-stack alignment if needed. */
13441 t = ovf;
13442 if (align != 1)
13444 t = fold_build_pointer_plus_hwi (t, align - 1);
13445 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13446 build_int_cst (TREE_TYPE (t), -align));
13448 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13450 gimplify_assign (unshare_expr (addr), t, pre_p);
13452 t = fold_build_pointer_plus_hwi (t, size);
13453 gimplify_assign (unshare_expr (ovf), t, pre_p);
13455 if (lab_over)
13457 stmt = gimple_build_label (lab_over);
13458 gimple_seq_add_stmt (pre_p, stmt);
13461 if (STRICT_ALIGNMENT
13462 && (TYPE_ALIGN (type)
13463 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13465 /* The value (of type complex double, for example) may not be
13466 aligned in memory in the saved registers, so copy via a
13467 temporary. (This is the same code as used for SPARC.) */
13468 tree tmp = create_tmp_var (type, "va_arg_tmp");
13469 tree dest_addr = build_fold_addr_expr (tmp);
13471 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13472 3, dest_addr, addr, size_int (rsize * 4));
13474 gimplify_and_add (copy, pre_p);
13475 addr = dest_addr;
13478 addr = fold_convert (ptrtype, addr);
13479 return build_va_arg_indirect_ref (addr);
13482 /* Builtins. */
13484 static void
13485 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13487 tree t;
13488 unsigned classify = rs6000_builtin_info[(int)code].attr;
13489 const char *attr_string = "";
13491 gcc_assert (name != NULL);
13492 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13494 if (rs6000_builtin_decls[(int)code])
13495 fatal_error (input_location,
13496 "internal error: builtin function %s already processed", name);
13498 rs6000_builtin_decls[(int)code] = t =
13499 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13501 /* Set any special attributes. */
13502 if ((classify & RS6000_BTC_CONST) != 0)
13504 /* const function, function only depends on the inputs. */
13505 TREE_READONLY (t) = 1;
13506 TREE_NOTHROW (t) = 1;
13507 attr_string = ", const";
13509 else if ((classify & RS6000_BTC_PURE) != 0)
13511 /* pure function, function can read global memory, but does not set any
13512 external state. */
13513 DECL_PURE_P (t) = 1;
13514 TREE_NOTHROW (t) = 1;
13515 attr_string = ", pure";
13517 else if ((classify & RS6000_BTC_FP) != 0)
13519 /* Function is a math function. If rounding mode is on, then treat the
13520 function as not reading global memory, but it can have arbitrary side
13521 effects. If it is off, then assume the function is a const function.
13522 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13523 builtin-attribute.def that is used for the math functions. */
13524 TREE_NOTHROW (t) = 1;
13525 if (flag_rounding_math)
13527 DECL_PURE_P (t) = 1;
13528 DECL_IS_NOVOPS (t) = 1;
13529 attr_string = ", fp, pure";
13531 else
13533 TREE_READONLY (t) = 1;
13534 attr_string = ", fp, const";
13537 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13538 gcc_unreachable ();
13540 if (TARGET_DEBUG_BUILTIN)
13541 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13542 (int)code, name, attr_string);
13545 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13547 #undef RS6000_BUILTIN_0
13548 #undef RS6000_BUILTIN_1
13549 #undef RS6000_BUILTIN_2
13550 #undef RS6000_BUILTIN_3
13551 #undef RS6000_BUILTIN_A
13552 #undef RS6000_BUILTIN_D
13553 #undef RS6000_BUILTIN_E
13554 #undef RS6000_BUILTIN_H
13555 #undef RS6000_BUILTIN_P
13556 #undef RS6000_BUILTIN_Q
13557 #undef RS6000_BUILTIN_S
13558 #undef RS6000_BUILTIN_X
13560 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13561 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13562 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13563 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13564 { MASK, ICODE, NAME, ENUM },
13566 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13567 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13568 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13569 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13570 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13571 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13572 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13573 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13575 static const struct builtin_description bdesc_3arg[] =
13577 #include "rs6000-builtin.def"
13580 /* DST operations: void foo (void *, const int, const char). */
13582 #undef RS6000_BUILTIN_0
13583 #undef RS6000_BUILTIN_1
13584 #undef RS6000_BUILTIN_2
13585 #undef RS6000_BUILTIN_3
13586 #undef RS6000_BUILTIN_A
13587 #undef RS6000_BUILTIN_D
13588 #undef RS6000_BUILTIN_E
13589 #undef RS6000_BUILTIN_H
13590 #undef RS6000_BUILTIN_P
13591 #undef RS6000_BUILTIN_Q
13592 #undef RS6000_BUILTIN_S
13593 #undef RS6000_BUILTIN_X
13595 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13596 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13597 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13598 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13599 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13600 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13601 { MASK, ICODE, NAME, ENUM },
13603 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13604 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13605 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13606 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13607 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13608 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13610 static const struct builtin_description bdesc_dst[] =
13612 #include "rs6000-builtin.def"
13615 /* Simple binary operations: VECc = foo (VECa, VECb). */
13617 #undef RS6000_BUILTIN_0
13618 #undef RS6000_BUILTIN_1
13619 #undef RS6000_BUILTIN_2
13620 #undef RS6000_BUILTIN_3
13621 #undef RS6000_BUILTIN_A
13622 #undef RS6000_BUILTIN_D
13623 #undef RS6000_BUILTIN_E
13624 #undef RS6000_BUILTIN_H
13625 #undef RS6000_BUILTIN_P
13626 #undef RS6000_BUILTIN_Q
13627 #undef RS6000_BUILTIN_S
13628 #undef RS6000_BUILTIN_X
13630 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13631 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13632 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13633 { MASK, ICODE, NAME, ENUM },
13635 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13636 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13637 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13638 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13639 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13640 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13641 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13642 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13643 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13645 static const struct builtin_description bdesc_2arg[] =
13647 #include "rs6000-builtin.def"
13650 #undef RS6000_BUILTIN_0
13651 #undef RS6000_BUILTIN_1
13652 #undef RS6000_BUILTIN_2
13653 #undef RS6000_BUILTIN_3
13654 #undef RS6000_BUILTIN_A
13655 #undef RS6000_BUILTIN_D
13656 #undef RS6000_BUILTIN_E
13657 #undef RS6000_BUILTIN_H
13658 #undef RS6000_BUILTIN_P
13659 #undef RS6000_BUILTIN_Q
13660 #undef RS6000_BUILTIN_S
13661 #undef RS6000_BUILTIN_X
13663 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13664 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13665 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13666 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13667 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13668 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13669 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13670 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13671 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13672 { MASK, ICODE, NAME, ENUM },
13674 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13675 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13676 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13678 /* AltiVec predicates. */
13680 static const struct builtin_description bdesc_altivec_preds[] =
13682 #include "rs6000-builtin.def"
13685 /* SPE predicates. */
13686 #undef RS6000_BUILTIN_0
13687 #undef RS6000_BUILTIN_1
13688 #undef RS6000_BUILTIN_2
13689 #undef RS6000_BUILTIN_3
13690 #undef RS6000_BUILTIN_A
13691 #undef RS6000_BUILTIN_D
13692 #undef RS6000_BUILTIN_E
13693 #undef RS6000_BUILTIN_H
13694 #undef RS6000_BUILTIN_P
13695 #undef RS6000_BUILTIN_Q
13696 #undef RS6000_BUILTIN_S
13697 #undef RS6000_BUILTIN_X
13699 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13700 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13701 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13702 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13703 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13704 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13705 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13706 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13707 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13708 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13709 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13710 { MASK, ICODE, NAME, ENUM },
13712 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13714 static const struct builtin_description bdesc_spe_predicates[] =
13716 #include "rs6000-builtin.def"
13719 /* SPE evsel predicates. */
13720 #undef RS6000_BUILTIN_0
13721 #undef RS6000_BUILTIN_1
13722 #undef RS6000_BUILTIN_2
13723 #undef RS6000_BUILTIN_3
13724 #undef RS6000_BUILTIN_A
13725 #undef RS6000_BUILTIN_D
13726 #undef RS6000_BUILTIN_E
13727 #undef RS6000_BUILTIN_H
13728 #undef RS6000_BUILTIN_P
13729 #undef RS6000_BUILTIN_Q
13730 #undef RS6000_BUILTIN_S
13731 #undef RS6000_BUILTIN_X
13733 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13734 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13735 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13736 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13737 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13738 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13739 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13740 { MASK, ICODE, NAME, ENUM },
13742 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13743 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13744 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13745 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13746 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13748 static const struct builtin_description bdesc_spe_evsel[] =
13750 #include "rs6000-builtin.def"
13753 /* PAIRED predicates. */
13754 #undef RS6000_BUILTIN_0
13755 #undef RS6000_BUILTIN_1
13756 #undef RS6000_BUILTIN_2
13757 #undef RS6000_BUILTIN_3
13758 #undef RS6000_BUILTIN_A
13759 #undef RS6000_BUILTIN_D
13760 #undef RS6000_BUILTIN_E
13761 #undef RS6000_BUILTIN_H
13762 #undef RS6000_BUILTIN_P
13763 #undef RS6000_BUILTIN_Q
13764 #undef RS6000_BUILTIN_S
13765 #undef RS6000_BUILTIN_X
13767 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13768 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13769 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13770 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13771 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13772 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13773 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13774 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13775 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13776 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13777 { MASK, ICODE, NAME, ENUM },
13779 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13780 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13782 static const struct builtin_description bdesc_paired_preds[] =
13784 #include "rs6000-builtin.def"
13787 /* ABS* operations. */
13789 #undef RS6000_BUILTIN_0
13790 #undef RS6000_BUILTIN_1
13791 #undef RS6000_BUILTIN_2
13792 #undef RS6000_BUILTIN_3
13793 #undef RS6000_BUILTIN_A
13794 #undef RS6000_BUILTIN_D
13795 #undef RS6000_BUILTIN_E
13796 #undef RS6000_BUILTIN_H
13797 #undef RS6000_BUILTIN_P
13798 #undef RS6000_BUILTIN_Q
13799 #undef RS6000_BUILTIN_S
13800 #undef RS6000_BUILTIN_X
13802 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13803 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13804 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13805 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13806 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13807 { MASK, ICODE, NAME, ENUM },
13809 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13810 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13811 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13812 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13813 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13814 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13815 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13817 static const struct builtin_description bdesc_abs[] =
13819 #include "rs6000-builtin.def"
13822 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13823 foo (VECa). */
13825 #undef RS6000_BUILTIN_0
13826 #undef RS6000_BUILTIN_1
13827 #undef RS6000_BUILTIN_2
13828 #undef RS6000_BUILTIN_3
13829 #undef RS6000_BUILTIN_A
13830 #undef RS6000_BUILTIN_D
13831 #undef RS6000_BUILTIN_E
13832 #undef RS6000_BUILTIN_H
13833 #undef RS6000_BUILTIN_P
13834 #undef RS6000_BUILTIN_Q
13835 #undef RS6000_BUILTIN_S
13836 #undef RS6000_BUILTIN_X
13838 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13839 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13840 { MASK, ICODE, NAME, ENUM },
13842 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13843 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13844 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13845 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13846 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13847 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13848 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13849 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13850 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13851 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13853 static const struct builtin_description bdesc_1arg[] =
13855 #include "rs6000-builtin.def"
13858 /* Simple no-argument operations: result = __builtin_darn_32 () */
13860 #undef RS6000_BUILTIN_0
13861 #undef RS6000_BUILTIN_1
13862 #undef RS6000_BUILTIN_2
13863 #undef RS6000_BUILTIN_3
13864 #undef RS6000_BUILTIN_A
13865 #undef RS6000_BUILTIN_D
13866 #undef RS6000_BUILTIN_E
13867 #undef RS6000_BUILTIN_H
13868 #undef RS6000_BUILTIN_P
13869 #undef RS6000_BUILTIN_Q
13870 #undef RS6000_BUILTIN_S
13871 #undef RS6000_BUILTIN_X
13873 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13874 { MASK, ICODE, NAME, ENUM },
13876 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13877 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13878 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13879 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13880 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13881 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13882 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13883 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13884 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13885 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13886 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13888 static const struct builtin_description bdesc_0arg[] =
13890 #include "rs6000-builtin.def"
13893 /* HTM builtins. */
13894 #undef RS6000_BUILTIN_0
13895 #undef RS6000_BUILTIN_1
13896 #undef RS6000_BUILTIN_2
13897 #undef RS6000_BUILTIN_3
13898 #undef RS6000_BUILTIN_A
13899 #undef RS6000_BUILTIN_D
13900 #undef RS6000_BUILTIN_E
13901 #undef RS6000_BUILTIN_H
13902 #undef RS6000_BUILTIN_P
13903 #undef RS6000_BUILTIN_Q
13904 #undef RS6000_BUILTIN_S
13905 #undef RS6000_BUILTIN_X
13907 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13908 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13909 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13910 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13911 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13912 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13913 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13914 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13915 { MASK, ICODE, NAME, ENUM },
13917 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13918 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13919 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13920 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13922 static const struct builtin_description bdesc_htm[] =
13924 #include "rs6000-builtin.def"
13927 #undef RS6000_BUILTIN_0
13928 #undef RS6000_BUILTIN_1
13929 #undef RS6000_BUILTIN_2
13930 #undef RS6000_BUILTIN_3
13931 #undef RS6000_BUILTIN_A
13932 #undef RS6000_BUILTIN_D
13933 #undef RS6000_BUILTIN_E
13934 #undef RS6000_BUILTIN_H
13935 #undef RS6000_BUILTIN_P
13936 #undef RS6000_BUILTIN_Q
13937 #undef RS6000_BUILTIN_S
13939 /* Return true if a builtin function is overloaded. */
13940 bool
13941 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13943 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13946 const char *
13947 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13949 return rs6000_builtin_info[(int)fncode].name;
13952 /* Expand an expression EXP that calls a builtin without arguments. */
13953 static rtx
13954 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13956 rtx pat;
13957 machine_mode tmode = insn_data[icode].operand[0].mode;
13959 if (icode == CODE_FOR_nothing)
13960 /* Builtin not supported on this processor. */
13961 return 0;
13963 if (target == 0
13964 || GET_MODE (target) != tmode
13965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13966 target = gen_reg_rtx (tmode);
13968 pat = GEN_FCN (icode) (target);
13969 if (! pat)
13970 return 0;
13971 emit_insn (pat);
13973 return target;
13977 static rtx
13978 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13980 rtx pat;
13981 tree arg0 = CALL_EXPR_ARG (exp, 0);
13982 tree arg1 = CALL_EXPR_ARG (exp, 1);
13983 rtx op0 = expand_normal (arg0);
13984 rtx op1 = expand_normal (arg1);
13985 machine_mode mode0 = insn_data[icode].operand[0].mode;
13986 machine_mode mode1 = insn_data[icode].operand[1].mode;
13988 if (icode == CODE_FOR_nothing)
13989 /* Builtin not supported on this processor. */
13990 return 0;
13992 /* If we got invalid arguments bail out before generating bad rtl. */
13993 if (arg0 == error_mark_node || arg1 == error_mark_node)
13994 return const0_rtx;
13996 if (GET_CODE (op0) != CONST_INT
13997 || INTVAL (op0) > 255
13998 || INTVAL (op0) < 0)
14000 error ("argument 1 must be an 8-bit field value");
14001 return const0_rtx;
14004 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14005 op0 = copy_to_mode_reg (mode0, op0);
14007 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14008 op1 = copy_to_mode_reg (mode1, op1);
14010 pat = GEN_FCN (icode) (op0, op1);
14011 if (! pat)
14012 return const0_rtx;
14013 emit_insn (pat);
14015 return NULL_RTX;
14018 static rtx
14019 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14021 rtx pat;
14022 tree arg0 = CALL_EXPR_ARG (exp, 0);
14023 rtx op0 = expand_normal (arg0);
14024 machine_mode tmode = insn_data[icode].operand[0].mode;
14025 machine_mode mode0 = insn_data[icode].operand[1].mode;
14027 if (icode == CODE_FOR_nothing)
14028 /* Builtin not supported on this processor. */
14029 return 0;
14031 /* If we got invalid arguments bail out before generating bad rtl. */
14032 if (arg0 == error_mark_node)
14033 return const0_rtx;
14035 if (icode == CODE_FOR_altivec_vspltisb
14036 || icode == CODE_FOR_altivec_vspltish
14037 || icode == CODE_FOR_altivec_vspltisw
14038 || icode == CODE_FOR_spe_evsplatfi
14039 || icode == CODE_FOR_spe_evsplati)
14041 /* Only allow 5-bit *signed* literals. */
14042 if (GET_CODE (op0) != CONST_INT
14043 || INTVAL (op0) > 15
14044 || INTVAL (op0) < -16)
14046 error ("argument 1 must be a 5-bit signed literal");
14047 return const0_rtx;
14051 if (target == 0
14052 || GET_MODE (target) != tmode
14053 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14054 target = gen_reg_rtx (tmode);
14056 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14057 op0 = copy_to_mode_reg (mode0, op0);
14059 pat = GEN_FCN (icode) (target, op0);
14060 if (! pat)
14061 return 0;
14062 emit_insn (pat);
14064 return target;
14067 static rtx
14068 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14070 rtx pat, scratch1, scratch2;
14071 tree arg0 = CALL_EXPR_ARG (exp, 0);
14072 rtx op0 = expand_normal (arg0);
14073 machine_mode tmode = insn_data[icode].operand[0].mode;
14074 machine_mode mode0 = insn_data[icode].operand[1].mode;
14076 /* If we have invalid arguments, bail out before generating bad rtl. */
14077 if (arg0 == error_mark_node)
14078 return const0_rtx;
14080 if (target == 0
14081 || GET_MODE (target) != tmode
14082 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14083 target = gen_reg_rtx (tmode);
14085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14086 op0 = copy_to_mode_reg (mode0, op0);
14088 scratch1 = gen_reg_rtx (mode0);
14089 scratch2 = gen_reg_rtx (mode0);
14091 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14092 if (! pat)
14093 return 0;
14094 emit_insn (pat);
14096 return target;
14099 static rtx
14100 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14102 rtx pat;
14103 tree arg0 = CALL_EXPR_ARG (exp, 0);
14104 tree arg1 = CALL_EXPR_ARG (exp, 1);
14105 rtx op0 = expand_normal (arg0);
14106 rtx op1 = expand_normal (arg1);
14107 machine_mode tmode = insn_data[icode].operand[0].mode;
14108 machine_mode mode0 = insn_data[icode].operand[1].mode;
14109 machine_mode mode1 = insn_data[icode].operand[2].mode;
14111 if (icode == CODE_FOR_nothing)
14112 /* Builtin not supported on this processor. */
14113 return 0;
14115 /* If we got invalid arguments bail out before generating bad rtl. */
14116 if (arg0 == error_mark_node || arg1 == error_mark_node)
14117 return const0_rtx;
14119 if (icode == CODE_FOR_altivec_vcfux
14120 || icode == CODE_FOR_altivec_vcfsx
14121 || icode == CODE_FOR_altivec_vctsxs
14122 || icode == CODE_FOR_altivec_vctuxs
14123 || icode == CODE_FOR_altivec_vspltb
14124 || icode == CODE_FOR_altivec_vsplth
14125 || icode == CODE_FOR_altivec_vspltw
14126 || icode == CODE_FOR_spe_evaddiw
14127 || icode == CODE_FOR_spe_evldd
14128 || icode == CODE_FOR_spe_evldh
14129 || icode == CODE_FOR_spe_evldw
14130 || icode == CODE_FOR_spe_evlhhesplat
14131 || icode == CODE_FOR_spe_evlhhossplat
14132 || icode == CODE_FOR_spe_evlhhousplat
14133 || icode == CODE_FOR_spe_evlwhe
14134 || icode == CODE_FOR_spe_evlwhos
14135 || icode == CODE_FOR_spe_evlwhou
14136 || icode == CODE_FOR_spe_evlwhsplat
14137 || icode == CODE_FOR_spe_evlwwsplat
14138 || icode == CODE_FOR_spe_evrlwi
14139 || icode == CODE_FOR_spe_evslwi
14140 || icode == CODE_FOR_spe_evsrwis
14141 || icode == CODE_FOR_spe_evsubifw
14142 || icode == CODE_FOR_spe_evsrwiu)
14144 /* Only allow 5-bit unsigned literals. */
14145 STRIP_NOPS (arg1);
14146 if (TREE_CODE (arg1) != INTEGER_CST
14147 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14149 error ("argument 2 must be a 5-bit unsigned literal");
14150 return const0_rtx;
14153 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14154 || icode == CODE_FOR_dfptstsfi_lt_dd
14155 || icode == CODE_FOR_dfptstsfi_gt_dd
14156 || icode == CODE_FOR_dfptstsfi_unordered_dd
14157 || icode == CODE_FOR_dfptstsfi_eq_td
14158 || icode == CODE_FOR_dfptstsfi_lt_td
14159 || icode == CODE_FOR_dfptstsfi_gt_td
14160 || icode == CODE_FOR_dfptstsfi_unordered_td)
14162 /* Only allow 6-bit unsigned literals. */
14163 STRIP_NOPS (arg0);
14164 if (TREE_CODE (arg0) != INTEGER_CST
14165 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14167 error ("argument 1 must be a 6-bit unsigned literal");
14168 return CONST0_RTX (tmode);
14171 else if (icode == CODE_FOR_xststdcdp
14172 || icode == CODE_FOR_xststdcsp
14173 || icode == CODE_FOR_xvtstdcdp
14174 || icode == CODE_FOR_xvtstdcsp)
14176 /* Only allow 7-bit unsigned literals. */
14177 STRIP_NOPS (arg1);
14178 if (TREE_CODE (arg1) != INTEGER_CST
14179 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14181 error ("argument 2 must be a 7-bit unsigned literal");
14182 return CONST0_RTX (tmode);
14186 if (target == 0
14187 || GET_MODE (target) != tmode
14188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14189 target = gen_reg_rtx (tmode);
14191 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14192 op0 = copy_to_mode_reg (mode0, op0);
14193 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14194 op1 = copy_to_mode_reg (mode1, op1);
14196 pat = GEN_FCN (icode) (target, op0, op1);
14197 if (! pat)
14198 return 0;
14199 emit_insn (pat);
14201 return target;
14204 static rtx
14205 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14207 rtx pat, scratch;
14208 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14209 tree arg0 = CALL_EXPR_ARG (exp, 1);
14210 tree arg1 = CALL_EXPR_ARG (exp, 2);
14211 rtx op0 = expand_normal (arg0);
14212 rtx op1 = expand_normal (arg1);
14213 machine_mode tmode = SImode;
14214 machine_mode mode0 = insn_data[icode].operand[1].mode;
14215 machine_mode mode1 = insn_data[icode].operand[2].mode;
14216 int cr6_form_int;
14218 if (TREE_CODE (cr6_form) != INTEGER_CST)
14220 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14221 return const0_rtx;
14223 else
14224 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14226 gcc_assert (mode0 == mode1);
14228 /* If we have invalid arguments, bail out before generating bad rtl. */
14229 if (arg0 == error_mark_node || arg1 == error_mark_node)
14230 return const0_rtx;
14232 if (target == 0
14233 || GET_MODE (target) != tmode
14234 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14235 target = gen_reg_rtx (tmode);
14237 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14238 op0 = copy_to_mode_reg (mode0, op0);
14239 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14240 op1 = copy_to_mode_reg (mode1, op1);
14242 /* Note that for many of the relevant operations (e.g. cmpne or
14243 cmpeq) with float or double operands, it makes more sense for the
14244 mode of the allocated scratch register to select a vector of
14245 integer. But the choice to copy the mode of operand 0 was made
14246 long ago and there are no plans to change it. */
14247 scratch = gen_reg_rtx (mode0);
14249 pat = GEN_FCN (icode) (scratch, op0, op1);
14250 if (! pat)
14251 return 0;
14252 emit_insn (pat);
14254 /* The vec_any* and vec_all* predicates use the same opcodes for two
14255 different operations, but the bits in CR6 will be different
14256 depending on what information we want. So we have to play tricks
14257 with CR6 to get the right bits out.
14259 If you think this is disgusting, look at the specs for the
14260 AltiVec predicates. */
14262 switch (cr6_form_int)
14264 case 0:
14265 emit_insn (gen_cr6_test_for_zero (target));
14266 break;
14267 case 1:
14268 emit_insn (gen_cr6_test_for_zero_reverse (target));
14269 break;
14270 case 2:
14271 emit_insn (gen_cr6_test_for_lt (target));
14272 break;
14273 case 3:
14274 emit_insn (gen_cr6_test_for_lt_reverse (target));
14275 break;
14276 default:
14277 error ("argument 1 of __builtin_altivec_predicate is out of range");
14278 break;
14281 return target;
14284 static rtx
14285 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14287 rtx pat, addr;
14288 tree arg0 = CALL_EXPR_ARG (exp, 0);
14289 tree arg1 = CALL_EXPR_ARG (exp, 1);
14290 machine_mode tmode = insn_data[icode].operand[0].mode;
14291 machine_mode mode0 = Pmode;
14292 machine_mode mode1 = Pmode;
14293 rtx op0 = expand_normal (arg0);
14294 rtx op1 = expand_normal (arg1);
14296 if (icode == CODE_FOR_nothing)
14297 /* Builtin not supported on this processor. */
14298 return 0;
14300 /* If we got invalid arguments bail out before generating bad rtl. */
14301 if (arg0 == error_mark_node || arg1 == error_mark_node)
14302 return const0_rtx;
14304 if (target == 0
14305 || GET_MODE (target) != tmode
14306 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14307 target = gen_reg_rtx (tmode);
14309 op1 = copy_to_mode_reg (mode1, op1);
14311 if (op0 == const0_rtx)
14313 addr = gen_rtx_MEM (tmode, op1);
14315 else
14317 op0 = copy_to_mode_reg (mode0, op0);
14318 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14321 pat = GEN_FCN (icode) (target, addr);
14323 if (! pat)
14324 return 0;
14325 emit_insn (pat);
14327 return target;
14330 /* Return a constant vector for use as a little-endian permute control vector
14331 to reverse the order of elements of the given vector mode. */
14332 static rtx
14333 swap_selector_for_mode (machine_mode mode)
14335 /* These are little endian vectors, so their elements are reversed
14336 from what you would normally expect for a permute control vector. */
14337 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14338 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14339 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14340 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14341 unsigned int *swaparray, i;
14342 rtx perm[16];
14344 switch (mode)
14346 case V2DFmode:
14347 case V2DImode:
14348 swaparray = swap2;
14349 break;
14350 case V4SFmode:
14351 case V4SImode:
14352 swaparray = swap4;
14353 break;
14354 case V8HImode:
14355 swaparray = swap8;
14356 break;
14357 case V16QImode:
14358 swaparray = swap16;
14359 break;
14360 default:
14361 gcc_unreachable ();
14364 for (i = 0; i < 16; ++i)
14365 perm[i] = GEN_INT (swaparray[i]);
14367 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14370 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14371 with -maltivec=be specified. Issue the load followed by an element-
14372 reversing permute. */
14373 void
14374 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14376 rtx tmp = gen_reg_rtx (mode);
14377 rtx load = gen_rtx_SET (tmp, op1);
14378 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14379 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14380 rtx sel = swap_selector_for_mode (mode);
14381 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14383 gcc_assert (REG_P (op0));
14384 emit_insn (par);
14385 emit_insn (gen_rtx_SET (op0, vperm));
14388 /* Generate code for a "stvxl" built-in for a little endian target with
14389 -maltivec=be specified. Issue the store preceded by an element-reversing
14390 permute. */
14391 void
14392 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14394 rtx tmp = gen_reg_rtx (mode);
14395 rtx store = gen_rtx_SET (op0, tmp);
14396 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14397 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14398 rtx sel = swap_selector_for_mode (mode);
14399 rtx vperm;
14401 gcc_assert (REG_P (op1));
14402 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14403 emit_insn (gen_rtx_SET (tmp, vperm));
14404 emit_insn (par);
14407 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14408 specified. Issue the store preceded by an element-reversing permute. */
14409 void
14410 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14412 machine_mode inner_mode = GET_MODE_INNER (mode);
14413 rtx tmp = gen_reg_rtx (mode);
14414 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14415 rtx sel = swap_selector_for_mode (mode);
14416 rtx vperm;
14418 gcc_assert (REG_P (op1));
14419 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14420 emit_insn (gen_rtx_SET (tmp, vperm));
14421 emit_insn (gen_rtx_SET (op0, stvx));
14424 static rtx
14425 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14427 rtx pat, addr;
14428 tree arg0 = CALL_EXPR_ARG (exp, 0);
14429 tree arg1 = CALL_EXPR_ARG (exp, 1);
14430 machine_mode tmode = insn_data[icode].operand[0].mode;
14431 machine_mode mode0 = Pmode;
14432 machine_mode mode1 = Pmode;
14433 rtx op0 = expand_normal (arg0);
14434 rtx op1 = expand_normal (arg1);
14436 if (icode == CODE_FOR_nothing)
14437 /* Builtin not supported on this processor. */
14438 return 0;
14440 /* If we got invalid arguments bail out before generating bad rtl. */
14441 if (arg0 == error_mark_node || arg1 == error_mark_node)
14442 return const0_rtx;
14444 if (target == 0
14445 || GET_MODE (target) != tmode
14446 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14447 target = gen_reg_rtx (tmode);
14449 op1 = copy_to_mode_reg (mode1, op1);
14451 /* For LVX, express the RTL accurately by ANDing the address with -16.
14452 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14453 so the raw address is fine. */
14454 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14455 || icode == CODE_FOR_altivec_lvx_v2di_2op
14456 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14457 || icode == CODE_FOR_altivec_lvx_v4si_2op
14458 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14459 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14461 rtx rawaddr;
14462 if (op0 == const0_rtx)
14463 rawaddr = op1;
14464 else
14466 op0 = copy_to_mode_reg (mode0, op0);
14467 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14469 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14470 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14472 /* For -maltivec=be, emit the load and follow it up with a
14473 permute to swap the elements. */
14474 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14476 rtx temp = gen_reg_rtx (tmode);
14477 emit_insn (gen_rtx_SET (temp, addr));
14479 rtx sel = swap_selector_for_mode (tmode);
14480 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14481 UNSPEC_VPERM);
14482 emit_insn (gen_rtx_SET (target, vperm));
14484 else
14485 emit_insn (gen_rtx_SET (target, addr));
14487 else
14489 if (op0 == const0_rtx)
14490 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14491 else
14493 op0 = copy_to_mode_reg (mode0, op0);
14494 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14495 gen_rtx_PLUS (Pmode, op1, op0));
14498 pat = GEN_FCN (icode) (target, addr);
14499 if (! pat)
14500 return 0;
14501 emit_insn (pat);
14504 return target;
14507 static rtx
14508 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14510 tree arg0 = CALL_EXPR_ARG (exp, 0);
14511 tree arg1 = CALL_EXPR_ARG (exp, 1);
14512 tree arg2 = CALL_EXPR_ARG (exp, 2);
14513 rtx op0 = expand_normal (arg0);
14514 rtx op1 = expand_normal (arg1);
14515 rtx op2 = expand_normal (arg2);
14516 rtx pat;
14517 machine_mode mode0 = insn_data[icode].operand[0].mode;
14518 machine_mode mode1 = insn_data[icode].operand[1].mode;
14519 machine_mode mode2 = insn_data[icode].operand[2].mode;
14521 /* Invalid arguments. Bail before doing anything stoopid! */
14522 if (arg0 == error_mark_node
14523 || arg1 == error_mark_node
14524 || arg2 == error_mark_node)
14525 return const0_rtx;
14527 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14528 op0 = copy_to_mode_reg (mode2, op0);
14529 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14530 op1 = copy_to_mode_reg (mode0, op1);
14531 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14532 op2 = copy_to_mode_reg (mode1, op2);
14534 pat = GEN_FCN (icode) (op1, op2, op0);
14535 if (pat)
14536 emit_insn (pat);
14537 return NULL_RTX;
14540 static rtx
14541 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14543 tree arg0 = CALL_EXPR_ARG (exp, 0);
14544 tree arg1 = CALL_EXPR_ARG (exp, 1);
14545 tree arg2 = CALL_EXPR_ARG (exp, 2);
14546 rtx op0 = expand_normal (arg0);
14547 rtx op1 = expand_normal (arg1);
14548 rtx op2 = expand_normal (arg2);
14549 rtx pat, addr;
14550 machine_mode tmode = insn_data[icode].operand[0].mode;
14551 machine_mode mode1 = Pmode;
14552 machine_mode mode2 = Pmode;
14554 /* Invalid arguments. Bail before doing anything stoopid! */
14555 if (arg0 == error_mark_node
14556 || arg1 == error_mark_node
14557 || arg2 == error_mark_node)
14558 return const0_rtx;
14560 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14561 op0 = copy_to_mode_reg (tmode, op0);
14563 op2 = copy_to_mode_reg (mode2, op2);
14565 if (op1 == const0_rtx)
14567 addr = gen_rtx_MEM (tmode, op2);
14569 else
14571 op1 = copy_to_mode_reg (mode1, op1);
14572 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14575 pat = GEN_FCN (icode) (addr, op0);
14576 if (pat)
14577 emit_insn (pat);
14578 return NULL_RTX;
14581 static rtx
14582 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14584 rtx pat;
14585 tree arg0 = CALL_EXPR_ARG (exp, 0);
14586 tree arg1 = CALL_EXPR_ARG (exp, 1);
14587 tree arg2 = CALL_EXPR_ARG (exp, 2);
14588 rtx op0 = expand_normal (arg0);
14589 rtx op1 = expand_normal (arg1);
14590 rtx op2 = expand_normal (arg2);
14591 machine_mode mode0 = insn_data[icode].operand[0].mode;
14592 machine_mode mode1 = insn_data[icode].operand[1].mode;
14593 machine_mode mode2 = insn_data[icode].operand[2].mode;
14595 if (icode == CODE_FOR_nothing)
14596 /* Builtin not supported on this processor. */
14597 return NULL_RTX;
14599 /* If we got invalid arguments bail out before generating bad rtl. */
14600 if (arg0 == error_mark_node
14601 || arg1 == error_mark_node
14602 || arg2 == error_mark_node)
14603 return NULL_RTX;
14605 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14606 op0 = copy_to_mode_reg (mode0, op0);
14607 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14608 op1 = copy_to_mode_reg (mode1, op1);
14609 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14610 op2 = copy_to_mode_reg (mode2, op2);
14612 pat = GEN_FCN (icode) (op0, op1, op2);
14613 if (pat)
14614 emit_insn (pat);
14616 return NULL_RTX;
14619 static rtx
14620 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14622 tree arg0 = CALL_EXPR_ARG (exp, 0);
14623 tree arg1 = CALL_EXPR_ARG (exp, 1);
14624 tree arg2 = CALL_EXPR_ARG (exp, 2);
14625 rtx op0 = expand_normal (arg0);
14626 rtx op1 = expand_normal (arg1);
14627 rtx op2 = expand_normal (arg2);
14628 rtx pat, addr, rawaddr;
14629 machine_mode tmode = insn_data[icode].operand[0].mode;
14630 machine_mode smode = insn_data[icode].operand[1].mode;
14631 machine_mode mode1 = Pmode;
14632 machine_mode mode2 = Pmode;
14634 /* Invalid arguments. Bail before doing anything stoopid! */
14635 if (arg0 == error_mark_node
14636 || arg1 == error_mark_node
14637 || arg2 == error_mark_node)
14638 return const0_rtx;
14640 op2 = copy_to_mode_reg (mode2, op2);
14642 /* For STVX, express the RTL accurately by ANDing the address with -16.
14643 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14644 so the raw address is fine. */
14645 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14646 || icode == CODE_FOR_altivec_stvx_v2di_2op
14647 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14648 || icode == CODE_FOR_altivec_stvx_v4si_2op
14649 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14650 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14652 if (op1 == const0_rtx)
14653 rawaddr = op2;
14654 else
14656 op1 = copy_to_mode_reg (mode1, op1);
14657 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14660 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14661 addr = gen_rtx_MEM (tmode, addr);
14663 op0 = copy_to_mode_reg (tmode, op0);
14665 /* For -maltivec=be, emit a permute to swap the elements, followed
14666 by the store. */
14667 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14669 rtx temp = gen_reg_rtx (tmode);
14670 rtx sel = swap_selector_for_mode (tmode);
14671 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14672 UNSPEC_VPERM);
14673 emit_insn (gen_rtx_SET (temp, vperm));
14674 emit_insn (gen_rtx_SET (addr, temp));
14676 else
14677 emit_insn (gen_rtx_SET (addr, op0));
14679 else
14681 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14682 op0 = copy_to_mode_reg (smode, op0);
14684 if (op1 == const0_rtx)
14685 addr = gen_rtx_MEM (tmode, op2);
14686 else
14688 op1 = copy_to_mode_reg (mode1, op1);
14689 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14692 pat = GEN_FCN (icode) (addr, op0);
14693 if (pat)
14694 emit_insn (pat);
14697 return NULL_RTX;
14700 /* Return the appropriate SPR number associated with the given builtin. */
14701 static inline HOST_WIDE_INT
14702 htm_spr_num (enum rs6000_builtins code)
14704 if (code == HTM_BUILTIN_GET_TFHAR
14705 || code == HTM_BUILTIN_SET_TFHAR)
14706 return TFHAR_SPR;
14707 else if (code == HTM_BUILTIN_GET_TFIAR
14708 || code == HTM_BUILTIN_SET_TFIAR)
14709 return TFIAR_SPR;
14710 else if (code == HTM_BUILTIN_GET_TEXASR
14711 || code == HTM_BUILTIN_SET_TEXASR)
14712 return TEXASR_SPR;
14713 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14714 || code == HTM_BUILTIN_SET_TEXASRU);
14715 return TEXASRU_SPR;
14718 /* Return the appropriate SPR regno associated with the given builtin. */
14719 static inline HOST_WIDE_INT
14720 htm_spr_regno (enum rs6000_builtins code)
14722 if (code == HTM_BUILTIN_GET_TFHAR
14723 || code == HTM_BUILTIN_SET_TFHAR)
14724 return TFHAR_REGNO;
14725 else if (code == HTM_BUILTIN_GET_TFIAR
14726 || code == HTM_BUILTIN_SET_TFIAR)
14727 return TFIAR_REGNO;
14728 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14729 || code == HTM_BUILTIN_SET_TEXASR
14730 || code == HTM_BUILTIN_GET_TEXASRU
14731 || code == HTM_BUILTIN_SET_TEXASRU);
14732 return TEXASR_REGNO;
14735 /* Return the correct ICODE value depending on whether we are
14736 setting or reading the HTM SPRs. */
14737 static inline enum insn_code
14738 rs6000_htm_spr_icode (bool nonvoid)
14740 if (nonvoid)
14741 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14742 else
14743 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14746 /* Expand the HTM builtin in EXP and store the result in TARGET.
14747 Store true in *EXPANDEDP if we found a builtin to expand. */
14748 static rtx
14749 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14751 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14752 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14753 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14754 const struct builtin_description *d;
14755 size_t i;
14757 *expandedp = true;
14759 if (!TARGET_POWERPC64
14760 && (fcode == HTM_BUILTIN_TABORTDC
14761 || fcode == HTM_BUILTIN_TABORTDCI))
14763 size_t uns_fcode = (size_t)fcode;
14764 const char *name = rs6000_builtin_info[uns_fcode].name;
14765 error ("builtin %s is only valid in 64-bit mode", name);
14766 return const0_rtx;
14769 /* Expand the HTM builtins. */
14770 d = bdesc_htm;
14771 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14772 if (d->code == fcode)
14774 rtx op[MAX_HTM_OPERANDS], pat;
14775 int nopnds = 0;
14776 tree arg;
14777 call_expr_arg_iterator iter;
14778 unsigned attr = rs6000_builtin_info[fcode].attr;
14779 enum insn_code icode = d->icode;
14780 const struct insn_operand_data *insn_op;
14781 bool uses_spr = (attr & RS6000_BTC_SPR);
14782 rtx cr = NULL_RTX;
14784 if (uses_spr)
14785 icode = rs6000_htm_spr_icode (nonvoid);
14786 insn_op = &insn_data[icode].operand[0];
14788 if (nonvoid)
14790 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14791 if (!target
14792 || GET_MODE (target) != tmode
14793 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14794 target = gen_reg_rtx (tmode);
14795 if (uses_spr)
14796 op[nopnds++] = target;
14799 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14801 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14802 return const0_rtx;
14804 insn_op = &insn_data[icode].operand[nopnds];
14806 op[nopnds] = expand_normal (arg);
14808 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14810 if (!strcmp (insn_op->constraint, "n"))
14812 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14813 if (!CONST_INT_P (op[nopnds]))
14814 error ("argument %d must be an unsigned literal", arg_num);
14815 else
14816 error ("argument %d is an unsigned literal that is "
14817 "out of range", arg_num);
14818 return const0_rtx;
14820 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14823 nopnds++;
14826 /* Handle the builtins for extended mnemonics. These accept
14827 no arguments, but map to builtins that take arguments. */
14828 switch (fcode)
14830 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14831 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14832 op[nopnds++] = GEN_INT (1);
14833 if (flag_checking)
14834 attr |= RS6000_BTC_UNARY;
14835 break;
14836 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14837 op[nopnds++] = GEN_INT (0);
14838 if (flag_checking)
14839 attr |= RS6000_BTC_UNARY;
14840 break;
14841 default:
14842 break;
14845 /* If this builtin accesses SPRs, then pass in the appropriate
14846 SPR number and SPR regno as the last two operands. */
14847 if (uses_spr)
14849 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14850 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14851 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14853 /* If this builtin accesses a CR, then pass in a scratch
14854 CR as the last operand. */
14855 else if (attr & RS6000_BTC_CR)
14856 { cr = gen_reg_rtx (CCmode);
14857 op[nopnds++] = cr;
14860 if (flag_checking)
14862 int expected_nopnds = 0;
14863 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14864 expected_nopnds = 1;
14865 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14866 expected_nopnds = 2;
14867 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14868 expected_nopnds = 3;
14869 if (!(attr & RS6000_BTC_VOID))
14870 expected_nopnds += 1;
14871 if (uses_spr)
14872 expected_nopnds += 2;
14874 gcc_assert (nopnds == expected_nopnds
14875 && nopnds <= MAX_HTM_OPERANDS);
14878 switch (nopnds)
14880 case 1:
14881 pat = GEN_FCN (icode) (op[0]);
14882 break;
14883 case 2:
14884 pat = GEN_FCN (icode) (op[0], op[1]);
14885 break;
14886 case 3:
14887 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14888 break;
14889 case 4:
14890 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14891 break;
14892 default:
14893 gcc_unreachable ();
14895 if (!pat)
14896 return NULL_RTX;
14897 emit_insn (pat);
14899 if (attr & RS6000_BTC_CR)
14901 if (fcode == HTM_BUILTIN_TBEGIN)
14903 /* Emit code to set TARGET to true or false depending on
14904 whether the tbegin. instruction successfully or failed
14905 to start a transaction. We do this by placing the 1's
14906 complement of CR's EQ bit into TARGET. */
14907 rtx scratch = gen_reg_rtx (SImode);
14908 emit_insn (gen_rtx_SET (scratch,
14909 gen_rtx_EQ (SImode, cr,
14910 const0_rtx)));
14911 emit_insn (gen_rtx_SET (target,
14912 gen_rtx_XOR (SImode, scratch,
14913 GEN_INT (1))));
14915 else
14917 /* Emit code to copy the 4-bit condition register field
14918 CR into the least significant end of register TARGET. */
14919 rtx scratch1 = gen_reg_rtx (SImode);
14920 rtx scratch2 = gen_reg_rtx (SImode);
14921 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14922 emit_insn (gen_movcc (subreg, cr));
14923 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14924 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14928 if (nonvoid)
14929 return target;
14930 return const0_rtx;
14933 *expandedp = false;
14934 return NULL_RTX;
14937 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14939 static rtx
14940 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14941 rtx target)
14943 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14944 if (fcode == RS6000_BUILTIN_CPU_INIT)
14945 return const0_rtx;
14947 if (target == 0 || GET_MODE (target) != SImode)
14948 target = gen_reg_rtx (SImode);
14950 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14951 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14952 if (TREE_CODE (arg) != STRING_CST)
14954 error ("builtin %s only accepts a string argument",
14955 rs6000_builtin_info[(size_t) fcode].name);
14956 return const0_rtx;
14959 if (fcode == RS6000_BUILTIN_CPU_IS)
14961 const char *cpu = TREE_STRING_POINTER (arg);
14962 rtx cpuid = NULL_RTX;
14963 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14964 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14966 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14967 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14968 break;
14970 if (cpuid == NULL_RTX)
14972 /* Invalid CPU argument. */
14973 error ("cpu %s is an invalid argument to builtin %s",
14974 cpu, rs6000_builtin_info[(size_t) fcode].name);
14975 return const0_rtx;
14978 rtx platform = gen_reg_rtx (SImode);
14979 rtx tcbmem = gen_const_mem (SImode,
14980 gen_rtx_PLUS (Pmode,
14981 gen_rtx_REG (Pmode, TLS_REGNUM),
14982 GEN_INT (TCB_PLATFORM_OFFSET)));
14983 emit_move_insn (platform, tcbmem);
14984 emit_insn (gen_eqsi3 (target, platform, cpuid));
14986 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14988 const char *hwcap = TREE_STRING_POINTER (arg);
14989 rtx mask = NULL_RTX;
14990 int hwcap_offset;
14991 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14992 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14994 mask = GEN_INT (cpu_supports_info[i].mask);
14995 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14996 break;
14998 if (mask == NULL_RTX)
15000 /* Invalid HWCAP argument. */
15001 error ("hwcap %s is an invalid argument to builtin %s",
15002 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15003 return const0_rtx;
15006 rtx tcb_hwcap = gen_reg_rtx (SImode);
15007 rtx tcbmem = gen_const_mem (SImode,
15008 gen_rtx_PLUS (Pmode,
15009 gen_rtx_REG (Pmode, TLS_REGNUM),
15010 GEN_INT (hwcap_offset)));
15011 emit_move_insn (tcb_hwcap, tcbmem);
15012 rtx scratch1 = gen_reg_rtx (SImode);
15013 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15014 rtx scratch2 = gen_reg_rtx (SImode);
15015 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15016 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15019 /* Record that we have expanded a CPU builtin, so that we can later
15020 emit a reference to the special symbol exported by LIBC to ensure we
15021 do not link against an old LIBC that doesn't support this feature. */
15022 cpu_builtin_p = true;
15024 #else
15025 /* For old LIBCs, always return FALSE. */
15026 emit_move_insn (target, GEN_INT (0));
15027 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15029 return target;
15032 static rtx
15033 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15035 rtx pat;
15036 tree arg0 = CALL_EXPR_ARG (exp, 0);
15037 tree arg1 = CALL_EXPR_ARG (exp, 1);
15038 tree arg2 = CALL_EXPR_ARG (exp, 2);
15039 rtx op0 = expand_normal (arg0);
15040 rtx op1 = expand_normal (arg1);
15041 rtx op2 = expand_normal (arg2);
15042 machine_mode tmode = insn_data[icode].operand[0].mode;
15043 machine_mode mode0 = insn_data[icode].operand[1].mode;
15044 machine_mode mode1 = insn_data[icode].operand[2].mode;
15045 machine_mode mode2 = insn_data[icode].operand[3].mode;
15047 if (icode == CODE_FOR_nothing)
15048 /* Builtin not supported on this processor. */
15049 return 0;
15051 /* If we got invalid arguments bail out before generating bad rtl. */
15052 if (arg0 == error_mark_node
15053 || arg1 == error_mark_node
15054 || arg2 == error_mark_node)
15055 return const0_rtx;
15057 /* Check and prepare argument depending on the instruction code.
15059 Note that a switch statement instead of the sequence of tests
15060 would be incorrect as many of the CODE_FOR values could be
15061 CODE_FOR_nothing and that would yield multiple alternatives
15062 with identical values. We'd never reach here at runtime in
15063 this case. */
15064 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15065 || icode == CODE_FOR_altivec_vsldoi_v4si
15066 || icode == CODE_FOR_altivec_vsldoi_v8hi
15067 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15069 /* Only allow 4-bit unsigned literals. */
15070 STRIP_NOPS (arg2);
15071 if (TREE_CODE (arg2) != INTEGER_CST
15072 || TREE_INT_CST_LOW (arg2) & ~0xf)
15074 error ("argument 3 must be a 4-bit unsigned literal");
15075 return const0_rtx;
15078 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15079 || icode == CODE_FOR_vsx_xxpermdi_v2di
15080 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15081 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15082 || icode == CODE_FOR_vsx_xxsldwi_v4si
15083 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15084 || icode == CODE_FOR_vsx_xxsldwi_v2di
15085 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15087 /* Only allow 2-bit unsigned literals. */
15088 STRIP_NOPS (arg2);
15089 if (TREE_CODE (arg2) != INTEGER_CST
15090 || TREE_INT_CST_LOW (arg2) & ~0x3)
15092 error ("argument 3 must be a 2-bit unsigned literal");
15093 return const0_rtx;
15096 else if (icode == CODE_FOR_vsx_set_v2df
15097 || icode == CODE_FOR_vsx_set_v2di
15098 || icode == CODE_FOR_bcdadd
15099 || icode == CODE_FOR_bcdadd_lt
15100 || icode == CODE_FOR_bcdadd_eq
15101 || icode == CODE_FOR_bcdadd_gt
15102 || icode == CODE_FOR_bcdsub
15103 || icode == CODE_FOR_bcdsub_lt
15104 || icode == CODE_FOR_bcdsub_eq
15105 || icode == CODE_FOR_bcdsub_gt)
15107 /* Only allow 1-bit unsigned literals. */
15108 STRIP_NOPS (arg2);
15109 if (TREE_CODE (arg2) != INTEGER_CST
15110 || TREE_INT_CST_LOW (arg2) & ~0x1)
15112 error ("argument 3 must be a 1-bit unsigned literal");
15113 return const0_rtx;
15116 else if (icode == CODE_FOR_dfp_ddedpd_dd
15117 || icode == CODE_FOR_dfp_ddedpd_td)
15119 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15120 STRIP_NOPS (arg0);
15121 if (TREE_CODE (arg0) != INTEGER_CST
15122 || TREE_INT_CST_LOW (arg2) & ~0x3)
15124 error ("argument 1 must be 0 or 2");
15125 return const0_rtx;
15128 else if (icode == CODE_FOR_dfp_denbcd_dd
15129 || icode == CODE_FOR_dfp_denbcd_td)
15131 /* Only allow 1-bit unsigned literals. */
15132 STRIP_NOPS (arg0);
15133 if (TREE_CODE (arg0) != INTEGER_CST
15134 || TREE_INT_CST_LOW (arg0) & ~0x1)
15136 error ("argument 1 must be a 1-bit unsigned literal");
15137 return const0_rtx;
15140 else if (icode == CODE_FOR_dfp_dscli_dd
15141 || icode == CODE_FOR_dfp_dscli_td
15142 || icode == CODE_FOR_dfp_dscri_dd
15143 || icode == CODE_FOR_dfp_dscri_td)
15145 /* Only allow 6-bit unsigned literals. */
15146 STRIP_NOPS (arg1);
15147 if (TREE_CODE (arg1) != INTEGER_CST
15148 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15150 error ("argument 2 must be a 6-bit unsigned literal");
15151 return const0_rtx;
15154 else if (icode == CODE_FOR_crypto_vshasigmaw
15155 || icode == CODE_FOR_crypto_vshasigmad)
15157 /* Check whether the 2nd and 3rd arguments are integer constants and in
15158 range and prepare arguments. */
15159 STRIP_NOPS (arg1);
15160 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15162 error ("argument 2 must be 0 or 1");
15163 return const0_rtx;
15166 STRIP_NOPS (arg2);
15167 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
15169 error ("argument 3 must be in the range 0..15");
15170 return const0_rtx;
15174 if (target == 0
15175 || GET_MODE (target) != tmode
15176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15177 target = gen_reg_rtx (tmode);
15179 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15180 op0 = copy_to_mode_reg (mode0, op0);
15181 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15182 op1 = copy_to_mode_reg (mode1, op1);
15183 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15184 op2 = copy_to_mode_reg (mode2, op2);
15186 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15187 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15188 else
15189 pat = GEN_FCN (icode) (target, op0, op1, op2);
15190 if (! pat)
15191 return 0;
15192 emit_insn (pat);
15194 return target;
15197 /* Expand the lvx builtins. */
15198 static rtx
15199 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15201 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15202 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15203 tree arg0;
15204 machine_mode tmode, mode0;
15205 rtx pat, op0;
15206 enum insn_code icode;
15208 switch (fcode)
15210 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15211 icode = CODE_FOR_vector_altivec_load_v16qi;
15212 break;
15213 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15214 icode = CODE_FOR_vector_altivec_load_v8hi;
15215 break;
15216 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15217 icode = CODE_FOR_vector_altivec_load_v4si;
15218 break;
15219 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15220 icode = CODE_FOR_vector_altivec_load_v4sf;
15221 break;
15222 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15223 icode = CODE_FOR_vector_altivec_load_v2df;
15224 break;
15225 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15226 icode = CODE_FOR_vector_altivec_load_v2di;
15227 break;
15228 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15229 icode = CODE_FOR_vector_altivec_load_v1ti;
15230 break;
15231 default:
15232 *expandedp = false;
15233 return NULL_RTX;
15236 *expandedp = true;
15238 arg0 = CALL_EXPR_ARG (exp, 0);
15239 op0 = expand_normal (arg0);
15240 tmode = insn_data[icode].operand[0].mode;
15241 mode0 = insn_data[icode].operand[1].mode;
15243 if (target == 0
15244 || GET_MODE (target) != tmode
15245 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15246 target = gen_reg_rtx (tmode);
15248 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15249 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15251 pat = GEN_FCN (icode) (target, op0);
15252 if (! pat)
15253 return 0;
15254 emit_insn (pat);
15255 return target;
15258 /* Expand the stvx builtins. */
15259 static rtx
15260 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15261 bool *expandedp)
15263 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15264 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15265 tree arg0, arg1;
15266 machine_mode mode0, mode1;
15267 rtx pat, op0, op1;
15268 enum insn_code icode;
15270 switch (fcode)
15272 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15273 icode = CODE_FOR_vector_altivec_store_v16qi;
15274 break;
15275 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15276 icode = CODE_FOR_vector_altivec_store_v8hi;
15277 break;
15278 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15279 icode = CODE_FOR_vector_altivec_store_v4si;
15280 break;
15281 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15282 icode = CODE_FOR_vector_altivec_store_v4sf;
15283 break;
15284 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15285 icode = CODE_FOR_vector_altivec_store_v2df;
15286 break;
15287 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15288 icode = CODE_FOR_vector_altivec_store_v2di;
15289 break;
15290 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15291 icode = CODE_FOR_vector_altivec_store_v1ti;
15292 break;
15293 default:
15294 *expandedp = false;
15295 return NULL_RTX;
15298 arg0 = CALL_EXPR_ARG (exp, 0);
15299 arg1 = CALL_EXPR_ARG (exp, 1);
15300 op0 = expand_normal (arg0);
15301 op1 = expand_normal (arg1);
15302 mode0 = insn_data[icode].operand[0].mode;
15303 mode1 = insn_data[icode].operand[1].mode;
15305 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15306 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15307 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15308 op1 = copy_to_mode_reg (mode1, op1);
15310 pat = GEN_FCN (icode) (op0, op1);
15311 if (pat)
15312 emit_insn (pat);
15314 *expandedp = true;
15315 return NULL_RTX;
15318 /* Expand the dst builtins. */
15319 static rtx
15320 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15321 bool *expandedp)
15323 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15324 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15325 tree arg0, arg1, arg2;
15326 machine_mode mode0, mode1;
15327 rtx pat, op0, op1, op2;
15328 const struct builtin_description *d;
15329 size_t i;
15331 *expandedp = false;
15333 /* Handle DST variants. */
15334 d = bdesc_dst;
15335 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15336 if (d->code == fcode)
15338 arg0 = CALL_EXPR_ARG (exp, 0);
15339 arg1 = CALL_EXPR_ARG (exp, 1);
15340 arg2 = CALL_EXPR_ARG (exp, 2);
15341 op0 = expand_normal (arg0);
15342 op1 = expand_normal (arg1);
15343 op2 = expand_normal (arg2);
15344 mode0 = insn_data[d->icode].operand[0].mode;
15345 mode1 = insn_data[d->icode].operand[1].mode;
15347 /* Invalid arguments, bail out before generating bad rtl. */
15348 if (arg0 == error_mark_node
15349 || arg1 == error_mark_node
15350 || arg2 == error_mark_node)
15351 return const0_rtx;
15353 *expandedp = true;
15354 STRIP_NOPS (arg2);
15355 if (TREE_CODE (arg2) != INTEGER_CST
15356 || TREE_INT_CST_LOW (arg2) & ~0x3)
15358 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15359 return const0_rtx;
15362 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15363 op0 = copy_to_mode_reg (Pmode, op0);
15364 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15365 op1 = copy_to_mode_reg (mode1, op1);
15367 pat = GEN_FCN (d->icode) (op0, op1, op2);
15368 if (pat != 0)
15369 emit_insn (pat);
15371 return NULL_RTX;
15374 return NULL_RTX;
15377 /* Expand vec_init builtin. */
15378 static rtx
15379 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15381 machine_mode tmode = TYPE_MODE (type);
15382 machine_mode inner_mode = GET_MODE_INNER (tmode);
15383 int i, n_elt = GET_MODE_NUNITS (tmode);
15385 gcc_assert (VECTOR_MODE_P (tmode));
15386 gcc_assert (n_elt == call_expr_nargs (exp));
15388 if (!target || !register_operand (target, tmode))
15389 target = gen_reg_rtx (tmode);
15391 /* If we have a vector compromised of a single element, such as V1TImode, do
15392 the initialization directly. */
15393 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15395 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15396 emit_move_insn (target, gen_lowpart (tmode, x));
15398 else
15400 rtvec v = rtvec_alloc (n_elt);
15402 for (i = 0; i < n_elt; ++i)
15404 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15405 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15408 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15411 return target;
15414 /* Return the integer constant in ARG. Constrain it to be in the range
15415 of the subparts of VEC_TYPE; issue an error if not. */
15417 static int
15418 get_element_number (tree vec_type, tree arg)
15420 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15422 if (!tree_fits_uhwi_p (arg)
15423 || (elt = tree_to_uhwi (arg), elt > max))
15425 error ("selector must be an integer constant in the range 0..%wi", max);
15426 return 0;
15429 return elt;
15432 /* Expand vec_set builtin. */
15433 static rtx
15434 altivec_expand_vec_set_builtin (tree exp)
15436 machine_mode tmode, mode1;
15437 tree arg0, arg1, arg2;
15438 int elt;
15439 rtx op0, op1;
15441 arg0 = CALL_EXPR_ARG (exp, 0);
15442 arg1 = CALL_EXPR_ARG (exp, 1);
15443 arg2 = CALL_EXPR_ARG (exp, 2);
15445 tmode = TYPE_MODE (TREE_TYPE (arg0));
15446 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15447 gcc_assert (VECTOR_MODE_P (tmode));
15449 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15450 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15451 elt = get_element_number (TREE_TYPE (arg0), arg2);
15453 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15454 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15456 op0 = force_reg (tmode, op0);
15457 op1 = force_reg (mode1, op1);
15459 rs6000_expand_vector_set (op0, op1, elt);
15461 return op0;
15464 /* Expand vec_ext builtin. */
15465 static rtx
15466 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15468 machine_mode tmode, mode0;
15469 tree arg0, arg1;
15470 rtx op0;
15471 rtx op1;
15473 arg0 = CALL_EXPR_ARG (exp, 0);
15474 arg1 = CALL_EXPR_ARG (exp, 1);
15476 op0 = expand_normal (arg0);
15477 op1 = expand_normal (arg1);
15479 /* Call get_element_number to validate arg1 if it is a constant. */
15480 if (TREE_CODE (arg1) == INTEGER_CST)
15481 (void) get_element_number (TREE_TYPE (arg0), arg1);
15483 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15484 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15485 gcc_assert (VECTOR_MODE_P (mode0));
15487 op0 = force_reg (mode0, op0);
15489 if (optimize || !target || !register_operand (target, tmode))
15490 target = gen_reg_rtx (tmode);
15492 rs6000_expand_vector_extract (target, op0, op1);
15494 return target;
15497 /* Expand the builtin in EXP and store the result in TARGET. Store
15498 true in *EXPANDEDP if we found a builtin to expand. */
15499 static rtx
15500 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15502 const struct builtin_description *d;
15503 size_t i;
15504 enum insn_code icode;
15505 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15506 tree arg0;
15507 rtx op0, pat;
15508 machine_mode tmode, mode0;
15509 enum rs6000_builtins fcode
15510 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15512 if (rs6000_overloaded_builtin_p (fcode))
15514 *expandedp = true;
15515 error ("unresolved overload for Altivec builtin %qF", fndecl);
15517 /* Given it is invalid, just generate a normal call. */
15518 return expand_call (exp, target, false);
15521 target = altivec_expand_ld_builtin (exp, target, expandedp);
15522 if (*expandedp)
15523 return target;
15525 target = altivec_expand_st_builtin (exp, target, expandedp);
15526 if (*expandedp)
15527 return target;
15529 target = altivec_expand_dst_builtin (exp, target, expandedp);
15530 if (*expandedp)
15531 return target;
15533 *expandedp = true;
15535 switch (fcode)
15537 case ALTIVEC_BUILTIN_STVX_V2DF:
15538 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15539 case ALTIVEC_BUILTIN_STVX_V2DI:
15540 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15541 case ALTIVEC_BUILTIN_STVX_V4SF:
15542 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15543 case ALTIVEC_BUILTIN_STVX:
15544 case ALTIVEC_BUILTIN_STVX_V4SI:
15545 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15546 case ALTIVEC_BUILTIN_STVX_V8HI:
15547 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15548 case ALTIVEC_BUILTIN_STVX_V16QI:
15549 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15550 case ALTIVEC_BUILTIN_STVEBX:
15551 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15552 case ALTIVEC_BUILTIN_STVEHX:
15553 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15554 case ALTIVEC_BUILTIN_STVEWX:
15555 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15556 case ALTIVEC_BUILTIN_STVXL_V2DF:
15557 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15558 case ALTIVEC_BUILTIN_STVXL_V2DI:
15559 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15560 case ALTIVEC_BUILTIN_STVXL_V4SF:
15561 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15562 case ALTIVEC_BUILTIN_STVXL:
15563 case ALTIVEC_BUILTIN_STVXL_V4SI:
15564 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15565 case ALTIVEC_BUILTIN_STVXL_V8HI:
15566 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15567 case ALTIVEC_BUILTIN_STVXL_V16QI:
15568 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15570 case ALTIVEC_BUILTIN_STVLX:
15571 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15572 case ALTIVEC_BUILTIN_STVLXL:
15573 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15574 case ALTIVEC_BUILTIN_STVRX:
15575 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15576 case ALTIVEC_BUILTIN_STVRXL:
15577 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15579 case P9V_BUILTIN_STXVL:
15580 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15582 case VSX_BUILTIN_STXVD2X_V1TI:
15583 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15584 case VSX_BUILTIN_STXVD2X_V2DF:
15585 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15586 case VSX_BUILTIN_STXVD2X_V2DI:
15587 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15588 case VSX_BUILTIN_STXVW4X_V4SF:
15589 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15590 case VSX_BUILTIN_STXVW4X_V4SI:
15591 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15592 case VSX_BUILTIN_STXVW4X_V8HI:
15593 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15594 case VSX_BUILTIN_STXVW4X_V16QI:
15595 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15597 /* For the following on big endian, it's ok to use any appropriate
15598 unaligned-supporting store, so use a generic expander. For
15599 little-endian, the exact element-reversing instruction must
15600 be used. */
15601 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15603 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15604 : CODE_FOR_vsx_st_elemrev_v2df);
15605 return altivec_expand_stv_builtin (code, exp);
15607 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15609 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15610 : CODE_FOR_vsx_st_elemrev_v2di);
15611 return altivec_expand_stv_builtin (code, exp);
15613 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15615 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15616 : CODE_FOR_vsx_st_elemrev_v4sf);
15617 return altivec_expand_stv_builtin (code, exp);
15619 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15621 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15622 : CODE_FOR_vsx_st_elemrev_v4si);
15623 return altivec_expand_stv_builtin (code, exp);
15625 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15627 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15628 : CODE_FOR_vsx_st_elemrev_v8hi);
15629 return altivec_expand_stv_builtin (code, exp);
15631 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15633 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15634 : CODE_FOR_vsx_st_elemrev_v16qi);
15635 return altivec_expand_stv_builtin (code, exp);
15638 case ALTIVEC_BUILTIN_MFVSCR:
15639 icode = CODE_FOR_altivec_mfvscr;
15640 tmode = insn_data[icode].operand[0].mode;
15642 if (target == 0
15643 || GET_MODE (target) != tmode
15644 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15645 target = gen_reg_rtx (tmode);
15647 pat = GEN_FCN (icode) (target);
15648 if (! pat)
15649 return 0;
15650 emit_insn (pat);
15651 return target;
15653 case ALTIVEC_BUILTIN_MTVSCR:
15654 icode = CODE_FOR_altivec_mtvscr;
15655 arg0 = CALL_EXPR_ARG (exp, 0);
15656 op0 = expand_normal (arg0);
15657 mode0 = insn_data[icode].operand[0].mode;
15659 /* If we got invalid arguments bail out before generating bad rtl. */
15660 if (arg0 == error_mark_node)
15661 return const0_rtx;
15663 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15664 op0 = copy_to_mode_reg (mode0, op0);
15666 pat = GEN_FCN (icode) (op0);
15667 if (pat)
15668 emit_insn (pat);
15669 return NULL_RTX;
15671 case ALTIVEC_BUILTIN_DSSALL:
15672 emit_insn (gen_altivec_dssall ());
15673 return NULL_RTX;
15675 case ALTIVEC_BUILTIN_DSS:
15676 icode = CODE_FOR_altivec_dss;
15677 arg0 = CALL_EXPR_ARG (exp, 0);
15678 STRIP_NOPS (arg0);
15679 op0 = expand_normal (arg0);
15680 mode0 = insn_data[icode].operand[0].mode;
15682 /* If we got invalid arguments bail out before generating bad rtl. */
15683 if (arg0 == error_mark_node)
15684 return const0_rtx;
15686 if (TREE_CODE (arg0) != INTEGER_CST
15687 || TREE_INT_CST_LOW (arg0) & ~0x3)
15689 error ("argument to dss must be a 2-bit unsigned literal");
15690 return const0_rtx;
15693 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15694 op0 = copy_to_mode_reg (mode0, op0);
15696 emit_insn (gen_altivec_dss (op0));
15697 return NULL_RTX;
15699 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15700 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15701 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15702 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15703 case VSX_BUILTIN_VEC_INIT_V2DF:
15704 case VSX_BUILTIN_VEC_INIT_V2DI:
15705 case VSX_BUILTIN_VEC_INIT_V1TI:
15706 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15708 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15709 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15710 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15711 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15712 case VSX_BUILTIN_VEC_SET_V2DF:
15713 case VSX_BUILTIN_VEC_SET_V2DI:
15714 case VSX_BUILTIN_VEC_SET_V1TI:
15715 return altivec_expand_vec_set_builtin (exp);
15717 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15718 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15719 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15720 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15721 case VSX_BUILTIN_VEC_EXT_V2DF:
15722 case VSX_BUILTIN_VEC_EXT_V2DI:
15723 case VSX_BUILTIN_VEC_EXT_V1TI:
15724 return altivec_expand_vec_ext_builtin (exp, target);
15726 default:
15727 break;
15728 /* Fall through. */
15731 /* Expand abs* operations. */
15732 d = bdesc_abs;
15733 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15734 if (d->code == fcode)
15735 return altivec_expand_abs_builtin (d->icode, exp, target);
15737 /* Expand the AltiVec predicates. */
15738 d = bdesc_altivec_preds;
15739 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15740 if (d->code == fcode)
15741 return altivec_expand_predicate_builtin (d->icode, exp, target);
15743 /* LV* are funky. We initialized them differently. */
15744 switch (fcode)
15746 case ALTIVEC_BUILTIN_LVSL:
15747 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15748 exp, target, false);
15749 case ALTIVEC_BUILTIN_LVSR:
15750 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15751 exp, target, false);
15752 case ALTIVEC_BUILTIN_LVEBX:
15753 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15754 exp, target, false);
15755 case ALTIVEC_BUILTIN_LVEHX:
15756 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15757 exp, target, false);
15758 case ALTIVEC_BUILTIN_LVEWX:
15759 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15760 exp, target, false);
15761 case ALTIVEC_BUILTIN_LVXL_V2DF:
15762 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15763 exp, target, false);
15764 case ALTIVEC_BUILTIN_LVXL_V2DI:
15765 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15766 exp, target, false);
15767 case ALTIVEC_BUILTIN_LVXL_V4SF:
15768 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15769 exp, target, false);
15770 case ALTIVEC_BUILTIN_LVXL:
15771 case ALTIVEC_BUILTIN_LVXL_V4SI:
15772 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15773 exp, target, false);
15774 case ALTIVEC_BUILTIN_LVXL_V8HI:
15775 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15776 exp, target, false);
15777 case ALTIVEC_BUILTIN_LVXL_V16QI:
15778 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15779 exp, target, false);
15780 case ALTIVEC_BUILTIN_LVX_V2DF:
15781 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15782 exp, target, false);
15783 case ALTIVEC_BUILTIN_LVX_V2DI:
15784 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15785 exp, target, false);
15786 case ALTIVEC_BUILTIN_LVX_V4SF:
15787 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15788 exp, target, false);
15789 case ALTIVEC_BUILTIN_LVX:
15790 case ALTIVEC_BUILTIN_LVX_V4SI:
15791 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15792 exp, target, false);
15793 case ALTIVEC_BUILTIN_LVX_V8HI:
15794 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15795 exp, target, false);
15796 case ALTIVEC_BUILTIN_LVX_V16QI:
15797 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15798 exp, target, false);
15799 case ALTIVEC_BUILTIN_LVLX:
15800 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15801 exp, target, true);
15802 case ALTIVEC_BUILTIN_LVLXL:
15803 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15804 exp, target, true);
15805 case ALTIVEC_BUILTIN_LVRX:
15806 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15807 exp, target, true);
15808 case ALTIVEC_BUILTIN_LVRXL:
15809 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15810 exp, target, true);
15811 case VSX_BUILTIN_LXVD2X_V1TI:
15812 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15813 exp, target, false);
15814 case VSX_BUILTIN_LXVD2X_V2DF:
15815 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15816 exp, target, false);
15817 case VSX_BUILTIN_LXVD2X_V2DI:
15818 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15819 exp, target, false);
15820 case VSX_BUILTIN_LXVW4X_V4SF:
15821 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15822 exp, target, false);
15823 case VSX_BUILTIN_LXVW4X_V4SI:
15824 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15825 exp, target, false);
15826 case VSX_BUILTIN_LXVW4X_V8HI:
15827 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15828 exp, target, false);
15829 case VSX_BUILTIN_LXVW4X_V16QI:
15830 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15831 exp, target, false);
15832 /* For the following on big endian, it's ok to use any appropriate
15833 unaligned-supporting load, so use a generic expander. For
15834 little-endian, the exact element-reversing instruction must
15835 be used. */
15836 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15838 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15839 : CODE_FOR_vsx_ld_elemrev_v2df);
15840 return altivec_expand_lv_builtin (code, exp, target, false);
15842 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15844 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15845 : CODE_FOR_vsx_ld_elemrev_v2di);
15846 return altivec_expand_lv_builtin (code, exp, target, false);
15848 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15850 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15851 : CODE_FOR_vsx_ld_elemrev_v4sf);
15852 return altivec_expand_lv_builtin (code, exp, target, false);
15854 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15856 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15857 : CODE_FOR_vsx_ld_elemrev_v4si);
15858 return altivec_expand_lv_builtin (code, exp, target, false);
15860 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15862 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15863 : CODE_FOR_vsx_ld_elemrev_v8hi);
15864 return altivec_expand_lv_builtin (code, exp, target, false);
15866 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15868 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15869 : CODE_FOR_vsx_ld_elemrev_v16qi);
15870 return altivec_expand_lv_builtin (code, exp, target, false);
15872 break;
15873 default:
15874 break;
15875 /* Fall through. */
15878 *expandedp = false;
15879 return NULL_RTX;
15882 /* Expand the builtin in EXP and store the result in TARGET. Store
15883 true in *EXPANDEDP if we found a builtin to expand. */
15884 static rtx
15885 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15887 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15888 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15889 const struct builtin_description *d;
15890 size_t i;
15892 *expandedp = true;
15894 switch (fcode)
15896 case PAIRED_BUILTIN_STX:
15897 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15898 case PAIRED_BUILTIN_LX:
15899 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15900 default:
15901 break;
15902 /* Fall through. */
15905 /* Expand the paired predicates. */
15906 d = bdesc_paired_preds;
15907 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15908 if (d->code == fcode)
15909 return paired_expand_predicate_builtin (d->icode, exp, target);
15911 *expandedp = false;
15912 return NULL_RTX;
15915 /* Binops that need to be initialized manually, but can be expanded
15916 automagically by rs6000_expand_binop_builtin. */
15917 static const struct builtin_description bdesc_2arg_spe[] =
15919 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15920 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15921 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15922 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15923 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15924 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15925 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15926 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15927 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15928 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15929 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15930 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15931 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15932 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15933 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15934 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15935 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15936 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15937 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15938 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15939 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15940 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15943 /* Expand the builtin in EXP and store the result in TARGET. Store
15944 true in *EXPANDEDP if we found a builtin to expand.
15946 This expands the SPE builtins that are not simple unary and binary
15947 operations. */
15948 static rtx
15949 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15951 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15952 tree arg1, arg0;
15953 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15954 enum insn_code icode;
15955 machine_mode tmode, mode0;
15956 rtx pat, op0;
15957 const struct builtin_description *d;
15958 size_t i;
15960 *expandedp = true;
15962 /* Syntax check for a 5-bit unsigned immediate. */
15963 switch (fcode)
15965 case SPE_BUILTIN_EVSTDD:
15966 case SPE_BUILTIN_EVSTDH:
15967 case SPE_BUILTIN_EVSTDW:
15968 case SPE_BUILTIN_EVSTWHE:
15969 case SPE_BUILTIN_EVSTWHO:
15970 case SPE_BUILTIN_EVSTWWE:
15971 case SPE_BUILTIN_EVSTWWO:
15972 arg1 = CALL_EXPR_ARG (exp, 2);
15973 if (TREE_CODE (arg1) != INTEGER_CST
15974 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15976 error ("argument 2 must be a 5-bit unsigned literal");
15977 return const0_rtx;
15979 break;
15980 default:
15981 break;
15984 /* The evsplat*i instructions are not quite generic. */
15985 switch (fcode)
15987 case SPE_BUILTIN_EVSPLATFI:
15988 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15989 exp, target);
15990 case SPE_BUILTIN_EVSPLATI:
15991 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15992 exp, target);
15993 default:
15994 break;
15997 d = bdesc_2arg_spe;
15998 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15999 if (d->code == fcode)
16000 return rs6000_expand_binop_builtin (d->icode, exp, target);
16002 d = bdesc_spe_predicates;
16003 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16004 if (d->code == fcode)
16005 return spe_expand_predicate_builtin (d->icode, exp, target);
16007 d = bdesc_spe_evsel;
16008 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16009 if (d->code == fcode)
16010 return spe_expand_evsel_builtin (d->icode, exp, target);
16012 switch (fcode)
16014 case SPE_BUILTIN_EVSTDDX:
16015 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16016 case SPE_BUILTIN_EVSTDHX:
16017 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16018 case SPE_BUILTIN_EVSTDWX:
16019 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16020 case SPE_BUILTIN_EVSTWHEX:
16021 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16022 case SPE_BUILTIN_EVSTWHOX:
16023 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16024 case SPE_BUILTIN_EVSTWWEX:
16025 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16026 case SPE_BUILTIN_EVSTWWOX:
16027 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16028 case SPE_BUILTIN_EVSTDD:
16029 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16030 case SPE_BUILTIN_EVSTDH:
16031 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16032 case SPE_BUILTIN_EVSTDW:
16033 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16034 case SPE_BUILTIN_EVSTWHE:
16035 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16036 case SPE_BUILTIN_EVSTWHO:
16037 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16038 case SPE_BUILTIN_EVSTWWE:
16039 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16040 case SPE_BUILTIN_EVSTWWO:
16041 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16042 case SPE_BUILTIN_MFSPEFSCR:
16043 icode = CODE_FOR_spe_mfspefscr;
16044 tmode = insn_data[icode].operand[0].mode;
16046 if (target == 0
16047 || GET_MODE (target) != tmode
16048 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16049 target = gen_reg_rtx (tmode);
16051 pat = GEN_FCN (icode) (target);
16052 if (! pat)
16053 return 0;
16054 emit_insn (pat);
16055 return target;
16056 case SPE_BUILTIN_MTSPEFSCR:
16057 icode = CODE_FOR_spe_mtspefscr;
16058 arg0 = CALL_EXPR_ARG (exp, 0);
16059 op0 = expand_normal (arg0);
16060 mode0 = insn_data[icode].operand[0].mode;
16062 if (arg0 == error_mark_node)
16063 return const0_rtx;
16065 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16066 op0 = copy_to_mode_reg (mode0, op0);
16068 pat = GEN_FCN (icode) (op0);
16069 if (pat)
16070 emit_insn (pat);
16071 return NULL_RTX;
16072 default:
16073 break;
16076 *expandedp = false;
16077 return NULL_RTX;
16080 static rtx
16081 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16083 rtx pat, scratch, tmp;
16084 tree form = CALL_EXPR_ARG (exp, 0);
16085 tree arg0 = CALL_EXPR_ARG (exp, 1);
16086 tree arg1 = CALL_EXPR_ARG (exp, 2);
16087 rtx op0 = expand_normal (arg0);
16088 rtx op1 = expand_normal (arg1);
16089 machine_mode mode0 = insn_data[icode].operand[1].mode;
16090 machine_mode mode1 = insn_data[icode].operand[2].mode;
16091 int form_int;
16092 enum rtx_code code;
16094 if (TREE_CODE (form) != INTEGER_CST)
16096 error ("argument 1 of __builtin_paired_predicate must be a constant");
16097 return const0_rtx;
16099 else
16100 form_int = TREE_INT_CST_LOW (form);
16102 gcc_assert (mode0 == mode1);
16104 if (arg0 == error_mark_node || arg1 == error_mark_node)
16105 return const0_rtx;
16107 if (target == 0
16108 || GET_MODE (target) != SImode
16109 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16110 target = gen_reg_rtx (SImode);
16111 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16112 op0 = copy_to_mode_reg (mode0, op0);
16113 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16114 op1 = copy_to_mode_reg (mode1, op1);
16116 scratch = gen_reg_rtx (CCFPmode);
16118 pat = GEN_FCN (icode) (scratch, op0, op1);
16119 if (!pat)
16120 return const0_rtx;
16122 emit_insn (pat);
16124 switch (form_int)
16126 /* LT bit. */
16127 case 0:
16128 code = LT;
16129 break;
16130 /* GT bit. */
16131 case 1:
16132 code = GT;
16133 break;
16134 /* EQ bit. */
16135 case 2:
16136 code = EQ;
16137 break;
16138 /* UN bit. */
16139 case 3:
16140 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16141 return target;
16142 default:
16143 error ("argument 1 of __builtin_paired_predicate is out of range");
16144 return const0_rtx;
16147 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16148 emit_move_insn (target, tmp);
16149 return target;
16152 static rtx
16153 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16155 rtx pat, scratch, tmp;
16156 tree form = CALL_EXPR_ARG (exp, 0);
16157 tree arg0 = CALL_EXPR_ARG (exp, 1);
16158 tree arg1 = CALL_EXPR_ARG (exp, 2);
16159 rtx op0 = expand_normal (arg0);
16160 rtx op1 = expand_normal (arg1);
16161 machine_mode mode0 = insn_data[icode].operand[1].mode;
16162 machine_mode mode1 = insn_data[icode].operand[2].mode;
16163 int form_int;
16164 enum rtx_code code;
16166 if (TREE_CODE (form) != INTEGER_CST)
16168 error ("argument 1 of __builtin_spe_predicate must be a constant");
16169 return const0_rtx;
16171 else
16172 form_int = TREE_INT_CST_LOW (form);
16174 gcc_assert (mode0 == mode1);
16176 if (arg0 == error_mark_node || arg1 == error_mark_node)
16177 return const0_rtx;
16179 if (target == 0
16180 || GET_MODE (target) != SImode
16181 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16182 target = gen_reg_rtx (SImode);
16184 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16185 op0 = copy_to_mode_reg (mode0, op0);
16186 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16187 op1 = copy_to_mode_reg (mode1, op1);
16189 scratch = gen_reg_rtx (CCmode);
16191 pat = GEN_FCN (icode) (scratch, op0, op1);
16192 if (! pat)
16193 return const0_rtx;
16194 emit_insn (pat);
16196 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16197 _lower_. We use one compare, but look in different bits of the
16198 CR for each variant.
16200 There are 2 elements in each SPE simd type (upper/lower). The CR
16201 bits are set as follows:
16203 BIT0 | BIT 1 | BIT 2 | BIT 3
16204 U | L | (U | L) | (U & L)
16206 So, for an "all" relationship, BIT 3 would be set.
16207 For an "any" relationship, BIT 2 would be set. Etc.
16209 Following traditional nomenclature, these bits map to:
16211 BIT0 | BIT 1 | BIT 2 | BIT 3
16212 LT | GT | EQ | OV
16214 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16217 switch (form_int)
16219 /* All variant. OV bit. */
16220 case 0:
16221 /* We need to get to the OV bit, which is the ORDERED bit. We
16222 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16223 that's ugly and will make validate_condition_mode die.
16224 So let's just use another pattern. */
16225 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16226 return target;
16227 /* Any variant. EQ bit. */
16228 case 1:
16229 code = EQ;
16230 break;
16231 /* Upper variant. LT bit. */
16232 case 2:
16233 code = LT;
16234 break;
16235 /* Lower variant. GT bit. */
16236 case 3:
16237 code = GT;
16238 break;
16239 default:
16240 error ("argument 1 of __builtin_spe_predicate is out of range");
16241 return const0_rtx;
16244 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16245 emit_move_insn (target, tmp);
16247 return target;
16250 /* The evsel builtins look like this:
16252 e = __builtin_spe_evsel_OP (a, b, c, d);
16254 and work like this:
16256 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16257 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16260 static rtx
16261 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16263 rtx pat, scratch;
16264 tree arg0 = CALL_EXPR_ARG (exp, 0);
16265 tree arg1 = CALL_EXPR_ARG (exp, 1);
16266 tree arg2 = CALL_EXPR_ARG (exp, 2);
16267 tree arg3 = CALL_EXPR_ARG (exp, 3);
16268 rtx op0 = expand_normal (arg0);
16269 rtx op1 = expand_normal (arg1);
16270 rtx op2 = expand_normal (arg2);
16271 rtx op3 = expand_normal (arg3);
16272 machine_mode mode0 = insn_data[icode].operand[1].mode;
16273 machine_mode mode1 = insn_data[icode].operand[2].mode;
16275 gcc_assert (mode0 == mode1);
16277 if (arg0 == error_mark_node || arg1 == error_mark_node
16278 || arg2 == error_mark_node || arg3 == error_mark_node)
16279 return const0_rtx;
16281 if (target == 0
16282 || GET_MODE (target) != mode0
16283 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16284 target = gen_reg_rtx (mode0);
16286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16287 op0 = copy_to_mode_reg (mode0, op0);
16288 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16289 op1 = copy_to_mode_reg (mode0, op1);
16290 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16291 op2 = copy_to_mode_reg (mode0, op2);
16292 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16293 op3 = copy_to_mode_reg (mode0, op3);
16295 /* Generate the compare. */
16296 scratch = gen_reg_rtx (CCmode);
16297 pat = GEN_FCN (icode) (scratch, op0, op1);
16298 if (! pat)
16299 return const0_rtx;
16300 emit_insn (pat);
16302 if (mode0 == V2SImode)
16303 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16304 else
16305 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16307 return target;
16310 /* Raise an error message for a builtin function that is called without the
16311 appropriate target options being set. */
16313 static void
16314 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16316 size_t uns_fncode = (size_t)fncode;
16317 const char *name = rs6000_builtin_info[uns_fncode].name;
16318 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16320 gcc_assert (name != NULL);
16321 if ((fnmask & RS6000_BTM_CELL) != 0)
16322 error ("Builtin function %s is only valid for the cell processor", name);
16323 else if ((fnmask & RS6000_BTM_VSX) != 0)
16324 error ("Builtin function %s requires the -mvsx option", name);
16325 else if ((fnmask & RS6000_BTM_HTM) != 0)
16326 error ("Builtin function %s requires the -mhtm option", name);
16327 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16328 error ("Builtin function %s requires the -maltivec option", name);
16329 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16330 error ("Builtin function %s requires the -mpaired option", name);
16331 else if ((fnmask & RS6000_BTM_SPE) != 0)
16332 error ("Builtin function %s requires the -mspe option", name);
16333 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16334 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16335 error ("Builtin function %s requires the -mhard-dfp and"
16336 " -mpower8-vector options", name);
16337 else if ((fnmask & RS6000_BTM_DFP) != 0)
16338 error ("Builtin function %s requires the -mhard-dfp option", name);
16339 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16340 error ("Builtin function %s requires the -mpower8-vector option", name);
16341 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16342 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16343 error ("Builtin function %s requires the -mcpu=power9 and"
16344 " -m64 options", name);
16345 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16346 error ("Builtin function %s requires the -mcpu=power9 option", name);
16347 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16348 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16349 error ("Builtin function %s requires the -mcpu=power9 and"
16350 " -m64 options", name);
16351 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16352 error ("Builtin function %s requires the -mcpu=power9 option", name);
16353 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16354 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16355 error ("Builtin function %s requires the -mhard-float and"
16356 " -mlong-double-128 options", name);
16357 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16358 error ("Builtin function %s requires the -mhard-float option", name);
16359 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16360 error ("Builtin function %s requires the -mfloat128 option", name);
16361 else
16362 error ("Builtin function %s is not supported with the current options",
16363 name);
16366 /* Target hook for early folding of built-ins, shamelessly stolen
16367 from ia64.c. */
16369 static tree
16370 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16371 tree *args, bool ignore ATTRIBUTE_UNUSED)
16373 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16375 enum rs6000_builtins fn_code
16376 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16377 switch (fn_code)
16379 case RS6000_BUILTIN_NANQ:
16380 case RS6000_BUILTIN_NANSQ:
16382 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16383 const char *str = c_getstr (*args);
16384 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16385 REAL_VALUE_TYPE real;
16387 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16388 return build_real (type, real);
16389 return NULL_TREE;
16391 case RS6000_BUILTIN_INFQ:
16392 case RS6000_BUILTIN_HUGE_VALQ:
16394 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16395 REAL_VALUE_TYPE inf;
16396 real_inf (&inf);
16397 return build_real (type, inf);
16399 default:
16400 break;
16403 #ifdef SUBTARGET_FOLD_BUILTIN
16404 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16405 #else
16406 return NULL_TREE;
16407 #endif
16410 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16411 a constant, use rs6000_fold_builtin.) */
16413 bool
16414 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16416 gimple *stmt = gsi_stmt (*gsi);
16417 tree fndecl = gimple_call_fndecl (stmt);
16418 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16419 enum rs6000_builtins fn_code
16420 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16421 tree arg0, arg1, lhs;
16423 switch (fn_code)
16425 /* Flavors of vec_add. We deliberately don't expand
16426 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16427 TImode, resulting in much poorer code generation. */
16428 case ALTIVEC_BUILTIN_VADDUBM:
16429 case ALTIVEC_BUILTIN_VADDUHM:
16430 case ALTIVEC_BUILTIN_VADDUWM:
16431 case P8V_BUILTIN_VADDUDM:
16432 case ALTIVEC_BUILTIN_VADDFP:
16433 case VSX_BUILTIN_XVADDDP:
16435 arg0 = gimple_call_arg (stmt, 0);
16436 arg1 = gimple_call_arg (stmt, 1);
16437 lhs = gimple_call_lhs (stmt);
16438 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16439 gimple_set_location (g, gimple_location (stmt));
16440 gsi_replace (gsi, g, true);
16441 return true;
16443 default:
16444 break;
16447 return false;
16450 /* Expand an expression EXP that calls a built-in function,
16451 with result going to TARGET if that's convenient
16452 (and in mode MODE if that's convenient).
16453 SUBTARGET may be used as the target for computing one of EXP's operands.
16454 IGNORE is nonzero if the value is to be ignored. */
16456 static rtx
16457 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16458 machine_mode mode ATTRIBUTE_UNUSED,
16459 int ignore ATTRIBUTE_UNUSED)
16461 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16462 enum rs6000_builtins fcode
16463 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16464 size_t uns_fcode = (size_t)fcode;
16465 const struct builtin_description *d;
16466 size_t i;
16467 rtx ret;
16468 bool success;
16469 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16470 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16472 if (TARGET_DEBUG_BUILTIN)
16474 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16475 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16476 const char *name2 = ((icode != CODE_FOR_nothing)
16477 ? get_insn_name ((int)icode)
16478 : "nothing");
16479 const char *name3;
16481 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16483 default: name3 = "unknown"; break;
16484 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16485 case RS6000_BTC_UNARY: name3 = "unary"; break;
16486 case RS6000_BTC_BINARY: name3 = "binary"; break;
16487 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16488 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16489 case RS6000_BTC_ABS: name3 = "abs"; break;
16490 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16491 case RS6000_BTC_DST: name3 = "dst"; break;
16495 fprintf (stderr,
16496 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16497 (name1) ? name1 : "---", fcode,
16498 (name2) ? name2 : "---", (int)icode,
16499 name3,
16500 func_valid_p ? "" : ", not valid");
16503 if (!func_valid_p)
16505 rs6000_invalid_builtin (fcode);
16507 /* Given it is invalid, just generate a normal call. */
16508 return expand_call (exp, target, ignore);
16511 switch (fcode)
16513 case RS6000_BUILTIN_RECIP:
16514 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16516 case RS6000_BUILTIN_RECIPF:
16517 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16519 case RS6000_BUILTIN_RSQRTF:
16520 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16522 case RS6000_BUILTIN_RSQRT:
16523 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16525 case POWER7_BUILTIN_BPERMD:
16526 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16527 ? CODE_FOR_bpermd_di
16528 : CODE_FOR_bpermd_si), exp, target);
16530 case RS6000_BUILTIN_GET_TB:
16531 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16532 target);
16534 case RS6000_BUILTIN_MFTB:
16535 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16536 ? CODE_FOR_rs6000_mftb_di
16537 : CODE_FOR_rs6000_mftb_si),
16538 target);
16540 case RS6000_BUILTIN_MFFS:
16541 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16543 case RS6000_BUILTIN_MTFSF:
16544 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16546 case RS6000_BUILTIN_CPU_INIT:
16547 case RS6000_BUILTIN_CPU_IS:
16548 case RS6000_BUILTIN_CPU_SUPPORTS:
16549 return cpu_expand_builtin (fcode, exp, target);
16551 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16552 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16554 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16555 : (int) CODE_FOR_altivec_lvsl_direct);
16556 machine_mode tmode = insn_data[icode].operand[0].mode;
16557 machine_mode mode = insn_data[icode].operand[1].mode;
16558 tree arg;
16559 rtx op, addr, pat;
16561 gcc_assert (TARGET_ALTIVEC);
16563 arg = CALL_EXPR_ARG (exp, 0);
16564 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16565 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16566 addr = memory_address (mode, op);
16567 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16568 op = addr;
16569 else
16571 /* For the load case need to negate the address. */
16572 op = gen_reg_rtx (GET_MODE (addr));
16573 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16575 op = gen_rtx_MEM (mode, op);
16577 if (target == 0
16578 || GET_MODE (target) != tmode
16579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16580 target = gen_reg_rtx (tmode);
16582 pat = GEN_FCN (icode) (target, op);
16583 if (!pat)
16584 return 0;
16585 emit_insn (pat);
16587 return target;
16590 case ALTIVEC_BUILTIN_VCFUX:
16591 case ALTIVEC_BUILTIN_VCFSX:
16592 case ALTIVEC_BUILTIN_VCTUXS:
16593 case ALTIVEC_BUILTIN_VCTSXS:
16594 /* FIXME: There's got to be a nicer way to handle this case than
16595 constructing a new CALL_EXPR. */
16596 if (call_expr_nargs (exp) == 1)
16598 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16599 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16601 break;
16603 default:
16604 break;
16607 if (TARGET_ALTIVEC)
16609 ret = altivec_expand_builtin (exp, target, &success);
16611 if (success)
16612 return ret;
16614 if (TARGET_SPE)
16616 ret = spe_expand_builtin (exp, target, &success);
16618 if (success)
16619 return ret;
16621 if (TARGET_PAIRED_FLOAT)
16623 ret = paired_expand_builtin (exp, target, &success);
16625 if (success)
16626 return ret;
16628 if (TARGET_HTM)
16630 ret = htm_expand_builtin (exp, target, &success);
16632 if (success)
16633 return ret;
16636 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16637 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16638 gcc_assert (attr == RS6000_BTC_UNARY
16639 || attr == RS6000_BTC_BINARY
16640 || attr == RS6000_BTC_TERNARY
16641 || attr == RS6000_BTC_SPECIAL);
16643 /* Handle simple unary operations. */
16644 d = bdesc_1arg;
16645 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16646 if (d->code == fcode)
16647 return rs6000_expand_unop_builtin (d->icode, exp, target);
16649 /* Handle simple binary operations. */
16650 d = bdesc_2arg;
16651 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16652 if (d->code == fcode)
16653 return rs6000_expand_binop_builtin (d->icode, exp, target);
16655 /* Handle simple ternary operations. */
16656 d = bdesc_3arg;
16657 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16658 if (d->code == fcode)
16659 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16661 /* Handle simple no-argument operations. */
16662 d = bdesc_0arg;
16663 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16664 if (d->code == fcode)
16665 return rs6000_expand_zeroop_builtin (d->icode, target);
16667 gcc_unreachable ();
16670 static void
16671 rs6000_init_builtins (void)
16673 tree tdecl;
16674 tree ftype;
16675 machine_mode mode;
16677 if (TARGET_DEBUG_BUILTIN)
16678 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16679 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16680 (TARGET_SPE) ? ", spe" : "",
16681 (TARGET_ALTIVEC) ? ", altivec" : "",
16682 (TARGET_VSX) ? ", vsx" : "");
16684 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16685 V2SF_type_node = build_vector_type (float_type_node, 2);
16686 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16687 V2DF_type_node = build_vector_type (double_type_node, 2);
16688 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16689 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16690 V4SF_type_node = build_vector_type (float_type_node, 4);
16691 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16692 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16694 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16695 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16696 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16697 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16699 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16700 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16701 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16702 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16704 const_str_type_node
16705 = build_pointer_type (build_qualified_type (char_type_node,
16706 TYPE_QUAL_CONST));
16708 /* We use V1TI mode as a special container to hold __int128_t items that
16709 must live in VSX registers. */
16710 if (intTI_type_node)
16712 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16713 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16716 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16717 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16718 'vector unsigned short'. */
16720 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16721 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16722 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16723 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16724 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16726 long_integer_type_internal_node = long_integer_type_node;
16727 long_unsigned_type_internal_node = long_unsigned_type_node;
16728 long_long_integer_type_internal_node = long_long_integer_type_node;
16729 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16730 intQI_type_internal_node = intQI_type_node;
16731 uintQI_type_internal_node = unsigned_intQI_type_node;
16732 intHI_type_internal_node = intHI_type_node;
16733 uintHI_type_internal_node = unsigned_intHI_type_node;
16734 intSI_type_internal_node = intSI_type_node;
16735 uintSI_type_internal_node = unsigned_intSI_type_node;
16736 intDI_type_internal_node = intDI_type_node;
16737 uintDI_type_internal_node = unsigned_intDI_type_node;
16738 intTI_type_internal_node = intTI_type_node;
16739 uintTI_type_internal_node = unsigned_intTI_type_node;
16740 float_type_internal_node = float_type_node;
16741 double_type_internal_node = double_type_node;
16742 long_double_type_internal_node = long_double_type_node;
16743 dfloat64_type_internal_node = dfloat64_type_node;
16744 dfloat128_type_internal_node = dfloat128_type_node;
16745 void_type_internal_node = void_type_node;
16747 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16748 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16749 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16750 format that uses a pair of doubles, depending on the switches and
16751 defaults.
16753 We do not enable the actual __float128 keyword unless the user explicitly
16754 asks for it, because the library support is not yet complete.
16756 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16757 floating point, we need make sure the type is non-zero or else self-test
16758 fails during bootstrap.
16760 We don't register a built-in type for __ibm128 if the type is the same as
16761 long double. Instead we add a #define for __ibm128 in
16762 rs6000_cpu_cpp_builtins to long double. */
16763 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16765 ibm128_float_type_node = make_node (REAL_TYPE);
16766 TYPE_PRECISION (ibm128_float_type_node) = 128;
16767 layout_type (ibm128_float_type_node);
16768 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16770 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16771 "__ibm128");
16773 else
16774 ibm128_float_type_node = long_double_type_node;
16776 if (TARGET_FLOAT128_KEYWORD)
16778 ieee128_float_type_node = float128_type_node;
16779 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16780 "__float128");
16783 else if (TARGET_FLOAT128_TYPE)
16785 ieee128_float_type_node = make_node (REAL_TYPE);
16786 TYPE_PRECISION (ibm128_float_type_node) = 128;
16787 layout_type (ieee128_float_type_node);
16788 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16790 /* If we are not exporting the __float128/_Float128 keywords, we need a
16791 keyword to get the types created. Use __ieee128 as the dummy
16792 keyword. */
16793 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16794 "__ieee128");
16797 else
16798 ieee128_float_type_node = long_double_type_node;
16800 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16801 tree type node. */
16802 builtin_mode_to_type[QImode][0] = integer_type_node;
16803 builtin_mode_to_type[HImode][0] = integer_type_node;
16804 builtin_mode_to_type[SImode][0] = intSI_type_node;
16805 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16806 builtin_mode_to_type[DImode][0] = intDI_type_node;
16807 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16808 builtin_mode_to_type[TImode][0] = intTI_type_node;
16809 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16810 builtin_mode_to_type[SFmode][0] = float_type_node;
16811 builtin_mode_to_type[DFmode][0] = double_type_node;
16812 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16813 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16814 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16815 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16816 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16817 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16818 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16819 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16820 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16821 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16822 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16823 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16824 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16825 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16826 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16827 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16828 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16829 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16830 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16831 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16833 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16834 TYPE_NAME (bool_char_type_node) = tdecl;
16836 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16837 TYPE_NAME (bool_short_type_node) = tdecl;
16839 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16840 TYPE_NAME (bool_int_type_node) = tdecl;
16842 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16843 TYPE_NAME (pixel_type_node) = tdecl;
16845 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16846 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16847 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16848 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16849 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16851 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16852 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16854 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16855 TYPE_NAME (V16QI_type_node) = tdecl;
16857 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16858 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16860 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16861 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16863 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16864 TYPE_NAME (V8HI_type_node) = tdecl;
16866 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16867 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16869 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16870 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16872 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16873 TYPE_NAME (V4SI_type_node) = tdecl;
16875 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16876 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16878 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16879 TYPE_NAME (V4SF_type_node) = tdecl;
16881 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16882 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16884 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16885 TYPE_NAME (V2DF_type_node) = tdecl;
16887 if (TARGET_POWERPC64)
16889 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16890 TYPE_NAME (V2DI_type_node) = tdecl;
16892 tdecl = add_builtin_type ("__vector unsigned long",
16893 unsigned_V2DI_type_node);
16894 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16896 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16897 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16899 else
16901 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16902 TYPE_NAME (V2DI_type_node) = tdecl;
16904 tdecl = add_builtin_type ("__vector unsigned long long",
16905 unsigned_V2DI_type_node);
16906 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16908 tdecl = add_builtin_type ("__vector __bool long long",
16909 bool_V2DI_type_node);
16910 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16913 if (V1TI_type_node)
16915 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16916 TYPE_NAME (V1TI_type_node) = tdecl;
16918 tdecl = add_builtin_type ("__vector unsigned __int128",
16919 unsigned_V1TI_type_node);
16920 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16923 /* Paired and SPE builtins are only available if you build a compiler with
16924 the appropriate options, so only create those builtins with the
16925 appropriate compiler option. Create Altivec and VSX builtins on machines
16926 with at least the general purpose extensions (970 and newer) to allow the
16927 use of the target attribute. */
16928 if (TARGET_PAIRED_FLOAT)
16929 paired_init_builtins ();
16930 if (TARGET_SPE)
16931 spe_init_builtins ();
16932 if (TARGET_EXTRA_BUILTINS)
16933 altivec_init_builtins ();
16934 if (TARGET_HTM)
16935 htm_init_builtins ();
16937 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16938 rs6000_common_init_builtins ();
16940 ftype = build_function_type_list (ieee128_float_type_node,
16941 const_str_type_node, NULL_TREE);
16942 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16943 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16945 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16946 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16947 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16949 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16950 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16951 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16953 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16954 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16955 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16957 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16958 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16959 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16961 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16962 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16963 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16965 mode = (TARGET_64BIT) ? DImode : SImode;
16966 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16967 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16968 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16970 ftype = build_function_type_list (unsigned_intDI_type_node,
16971 NULL_TREE);
16972 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16974 if (TARGET_64BIT)
16975 ftype = build_function_type_list (unsigned_intDI_type_node,
16976 NULL_TREE);
16977 else
16978 ftype = build_function_type_list (unsigned_intSI_type_node,
16979 NULL_TREE);
16980 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16982 ftype = build_function_type_list (double_type_node, NULL_TREE);
16983 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16985 ftype = build_function_type_list (void_type_node,
16986 intSI_type_node, double_type_node,
16987 NULL_TREE);
16988 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16990 ftype = build_function_type_list (void_type_node, NULL_TREE);
16991 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16993 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16994 NULL_TREE);
16995 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16996 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16998 /* AIX libm provides clog as __clog. */
16999 if (TARGET_XCOFF &&
17000 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17001 set_user_assembler_name (tdecl, "__clog");
17003 #ifdef SUBTARGET_INIT_BUILTINS
17004 SUBTARGET_INIT_BUILTINS;
17005 #endif
17008 /* Returns the rs6000 builtin decl for CODE. */
17010 static tree
17011 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17013 HOST_WIDE_INT fnmask;
17015 if (code >= RS6000_BUILTIN_COUNT)
17016 return error_mark_node;
17018 fnmask = rs6000_builtin_info[code].mask;
17019 if ((fnmask & rs6000_builtin_mask) != fnmask)
17021 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17022 return error_mark_node;
17025 return rs6000_builtin_decls[code];
17028 static void
17029 spe_init_builtins (void)
17031 tree puint_type_node = build_pointer_type (unsigned_type_node);
17032 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
17033 const struct builtin_description *d;
17034 size_t i;
17035 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17037 tree v2si_ftype_4_v2si
17038 = build_function_type_list (opaque_V2SI_type_node,
17039 opaque_V2SI_type_node,
17040 opaque_V2SI_type_node,
17041 opaque_V2SI_type_node,
17042 opaque_V2SI_type_node,
17043 NULL_TREE);
17045 tree v2sf_ftype_4_v2sf
17046 = build_function_type_list (opaque_V2SF_type_node,
17047 opaque_V2SF_type_node,
17048 opaque_V2SF_type_node,
17049 opaque_V2SF_type_node,
17050 opaque_V2SF_type_node,
17051 NULL_TREE);
17053 tree int_ftype_int_v2si_v2si
17054 = build_function_type_list (integer_type_node,
17055 integer_type_node,
17056 opaque_V2SI_type_node,
17057 opaque_V2SI_type_node,
17058 NULL_TREE);
17060 tree int_ftype_int_v2sf_v2sf
17061 = build_function_type_list (integer_type_node,
17062 integer_type_node,
17063 opaque_V2SF_type_node,
17064 opaque_V2SF_type_node,
17065 NULL_TREE);
17067 tree void_ftype_v2si_puint_int
17068 = build_function_type_list (void_type_node,
17069 opaque_V2SI_type_node,
17070 puint_type_node,
17071 integer_type_node,
17072 NULL_TREE);
17074 tree void_ftype_v2si_puint_char
17075 = build_function_type_list (void_type_node,
17076 opaque_V2SI_type_node,
17077 puint_type_node,
17078 char_type_node,
17079 NULL_TREE);
17081 tree void_ftype_v2si_pv2si_int
17082 = build_function_type_list (void_type_node,
17083 opaque_V2SI_type_node,
17084 opaque_p_V2SI_type_node,
17085 integer_type_node,
17086 NULL_TREE);
17088 tree void_ftype_v2si_pv2si_char
17089 = build_function_type_list (void_type_node,
17090 opaque_V2SI_type_node,
17091 opaque_p_V2SI_type_node,
17092 char_type_node,
17093 NULL_TREE);
17095 tree void_ftype_int
17096 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17098 tree int_ftype_void
17099 = build_function_type_list (integer_type_node, NULL_TREE);
17101 tree v2si_ftype_pv2si_int
17102 = build_function_type_list (opaque_V2SI_type_node,
17103 opaque_p_V2SI_type_node,
17104 integer_type_node,
17105 NULL_TREE);
17107 tree v2si_ftype_puint_int
17108 = build_function_type_list (opaque_V2SI_type_node,
17109 puint_type_node,
17110 integer_type_node,
17111 NULL_TREE);
17113 tree v2si_ftype_pushort_int
17114 = build_function_type_list (opaque_V2SI_type_node,
17115 pushort_type_node,
17116 integer_type_node,
17117 NULL_TREE);
17119 tree v2si_ftype_signed_char
17120 = build_function_type_list (opaque_V2SI_type_node,
17121 signed_char_type_node,
17122 NULL_TREE);
17124 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17126 /* Initialize irregular SPE builtins. */
17128 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17129 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17130 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17131 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17132 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17133 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17134 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17135 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17136 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17137 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17138 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
17139 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
17140 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
17141 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
17142 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
17143 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
17144 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
17145 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
17147 /* Loads. */
17148 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
17149 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
17150 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
17151 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
17152 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
17153 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
17154 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
17155 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17156 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17157 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17158 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17159 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17160 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17161 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17162 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17163 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17164 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17165 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17166 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17167 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17168 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17169 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17171 /* Predicates. */
17172 d = bdesc_spe_predicates;
17173 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17175 tree type;
17176 HOST_WIDE_INT mask = d->mask;
17178 if ((mask & builtin_mask) != mask)
17180 if (TARGET_DEBUG_BUILTIN)
17181 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
17182 d->name);
17183 continue;
17186 switch (insn_data[d->icode].operand[1].mode)
17188 case V2SImode:
17189 type = int_ftype_int_v2si_v2si;
17190 break;
17191 case V2SFmode:
17192 type = int_ftype_int_v2sf_v2sf;
17193 break;
17194 default:
17195 gcc_unreachable ();
17198 def_builtin (d->name, type, d->code);
17201 /* Evsel predicates. */
17202 d = bdesc_spe_evsel;
17203 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17205 tree type;
17206 HOST_WIDE_INT mask = d->mask;
17208 if ((mask & builtin_mask) != mask)
17210 if (TARGET_DEBUG_BUILTIN)
17211 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
17212 d->name);
17213 continue;
17216 switch (insn_data[d->icode].operand[1].mode)
17218 case V2SImode:
17219 type = v2si_ftype_4_v2si;
17220 break;
17221 case V2SFmode:
17222 type = v2sf_ftype_4_v2sf;
17223 break;
17224 default:
17225 gcc_unreachable ();
17228 def_builtin (d->name, type, d->code);
17232 static void
17233 paired_init_builtins (void)
17235 const struct builtin_description *d;
17236 size_t i;
17237 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17239 tree int_ftype_int_v2sf_v2sf
17240 = build_function_type_list (integer_type_node,
17241 integer_type_node,
17242 V2SF_type_node,
17243 V2SF_type_node,
17244 NULL_TREE);
17245 tree pcfloat_type_node =
17246 build_pointer_type (build_qualified_type
17247 (float_type_node, TYPE_QUAL_CONST));
17249 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17250 long_integer_type_node,
17251 pcfloat_type_node,
17252 NULL_TREE);
17253 tree void_ftype_v2sf_long_pcfloat =
17254 build_function_type_list (void_type_node,
17255 V2SF_type_node,
17256 long_integer_type_node,
17257 pcfloat_type_node,
17258 NULL_TREE);
17261 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17262 PAIRED_BUILTIN_LX);
17265 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17266 PAIRED_BUILTIN_STX);
17268 /* Predicates. */
17269 d = bdesc_paired_preds;
17270 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17272 tree type;
17273 HOST_WIDE_INT mask = d->mask;
17275 if ((mask & builtin_mask) != mask)
17277 if (TARGET_DEBUG_BUILTIN)
17278 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17279 d->name);
17280 continue;
17283 if (TARGET_DEBUG_BUILTIN)
17284 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17285 (int)i, get_insn_name (d->icode), (int)d->icode,
17286 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17288 switch (insn_data[d->icode].operand[1].mode)
17290 case V2SFmode:
17291 type = int_ftype_int_v2sf_v2sf;
17292 break;
17293 default:
17294 gcc_unreachable ();
17297 def_builtin (d->name, type, d->code);
17301 static void
17302 altivec_init_builtins (void)
17304 const struct builtin_description *d;
17305 size_t i;
17306 tree ftype;
17307 tree decl;
17308 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17310 tree pvoid_type_node = build_pointer_type (void_type_node);
17312 tree pcvoid_type_node
17313 = build_pointer_type (build_qualified_type (void_type_node,
17314 TYPE_QUAL_CONST));
17316 tree int_ftype_opaque
17317 = build_function_type_list (integer_type_node,
17318 opaque_V4SI_type_node, NULL_TREE);
17319 tree opaque_ftype_opaque
17320 = build_function_type_list (integer_type_node, NULL_TREE);
17321 tree opaque_ftype_opaque_int
17322 = build_function_type_list (opaque_V4SI_type_node,
17323 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17324 tree opaque_ftype_opaque_opaque_int
17325 = build_function_type_list (opaque_V4SI_type_node,
17326 opaque_V4SI_type_node, opaque_V4SI_type_node,
17327 integer_type_node, NULL_TREE);
17328 tree opaque_ftype_opaque_opaque_opaque
17329 = build_function_type_list (opaque_V4SI_type_node,
17330 opaque_V4SI_type_node, opaque_V4SI_type_node,
17331 opaque_V4SI_type_node, NULL_TREE);
17332 tree opaque_ftype_opaque_opaque
17333 = build_function_type_list (opaque_V4SI_type_node,
17334 opaque_V4SI_type_node, opaque_V4SI_type_node,
17335 NULL_TREE);
17336 tree int_ftype_int_opaque_opaque
17337 = build_function_type_list (integer_type_node,
17338 integer_type_node, opaque_V4SI_type_node,
17339 opaque_V4SI_type_node, NULL_TREE);
17340 tree int_ftype_int_v4si_v4si
17341 = build_function_type_list (integer_type_node,
17342 integer_type_node, V4SI_type_node,
17343 V4SI_type_node, NULL_TREE);
17344 tree int_ftype_int_v2di_v2di
17345 = build_function_type_list (integer_type_node,
17346 integer_type_node, V2DI_type_node,
17347 V2DI_type_node, NULL_TREE);
17348 tree void_ftype_v4si
17349 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17350 tree v8hi_ftype_void
17351 = build_function_type_list (V8HI_type_node, NULL_TREE);
17352 tree void_ftype_void
17353 = build_function_type_list (void_type_node, NULL_TREE);
17354 tree void_ftype_int
17355 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17357 tree opaque_ftype_long_pcvoid
17358 = build_function_type_list (opaque_V4SI_type_node,
17359 long_integer_type_node, pcvoid_type_node,
17360 NULL_TREE);
17361 tree v16qi_ftype_long_pcvoid
17362 = build_function_type_list (V16QI_type_node,
17363 long_integer_type_node, pcvoid_type_node,
17364 NULL_TREE);
17365 tree v8hi_ftype_long_pcvoid
17366 = build_function_type_list (V8HI_type_node,
17367 long_integer_type_node, pcvoid_type_node,
17368 NULL_TREE);
17369 tree v4si_ftype_long_pcvoid
17370 = build_function_type_list (V4SI_type_node,
17371 long_integer_type_node, pcvoid_type_node,
17372 NULL_TREE);
17373 tree v4sf_ftype_long_pcvoid
17374 = build_function_type_list (V4SF_type_node,
17375 long_integer_type_node, pcvoid_type_node,
17376 NULL_TREE);
17377 tree v2df_ftype_long_pcvoid
17378 = build_function_type_list (V2DF_type_node,
17379 long_integer_type_node, pcvoid_type_node,
17380 NULL_TREE);
17381 tree v2di_ftype_long_pcvoid
17382 = build_function_type_list (V2DI_type_node,
17383 long_integer_type_node, pcvoid_type_node,
17384 NULL_TREE);
17386 tree void_ftype_opaque_long_pvoid
17387 = build_function_type_list (void_type_node,
17388 opaque_V4SI_type_node, long_integer_type_node,
17389 pvoid_type_node, NULL_TREE);
17390 tree void_ftype_v4si_long_pvoid
17391 = build_function_type_list (void_type_node,
17392 V4SI_type_node, long_integer_type_node,
17393 pvoid_type_node, NULL_TREE);
17394 tree void_ftype_v16qi_long_pvoid
17395 = build_function_type_list (void_type_node,
17396 V16QI_type_node, long_integer_type_node,
17397 pvoid_type_node, NULL_TREE);
17399 tree void_ftype_v16qi_pvoid_long
17400 = build_function_type_list (void_type_node,
17401 V16QI_type_node, pvoid_type_node,
17402 long_integer_type_node, NULL_TREE);
17404 tree void_ftype_v8hi_long_pvoid
17405 = build_function_type_list (void_type_node,
17406 V8HI_type_node, long_integer_type_node,
17407 pvoid_type_node, NULL_TREE);
17408 tree void_ftype_v4sf_long_pvoid
17409 = build_function_type_list (void_type_node,
17410 V4SF_type_node, long_integer_type_node,
17411 pvoid_type_node, NULL_TREE);
17412 tree void_ftype_v2df_long_pvoid
17413 = build_function_type_list (void_type_node,
17414 V2DF_type_node, long_integer_type_node,
17415 pvoid_type_node, NULL_TREE);
17416 tree void_ftype_v2di_long_pvoid
17417 = build_function_type_list (void_type_node,
17418 V2DI_type_node, long_integer_type_node,
17419 pvoid_type_node, NULL_TREE);
17420 tree int_ftype_int_v8hi_v8hi
17421 = build_function_type_list (integer_type_node,
17422 integer_type_node, V8HI_type_node,
17423 V8HI_type_node, NULL_TREE);
17424 tree int_ftype_int_v16qi_v16qi
17425 = build_function_type_list (integer_type_node,
17426 integer_type_node, V16QI_type_node,
17427 V16QI_type_node, NULL_TREE);
17428 tree int_ftype_int_v4sf_v4sf
17429 = build_function_type_list (integer_type_node,
17430 integer_type_node, V4SF_type_node,
17431 V4SF_type_node, NULL_TREE);
17432 tree int_ftype_int_v2df_v2df
17433 = build_function_type_list (integer_type_node,
17434 integer_type_node, V2DF_type_node,
17435 V2DF_type_node, NULL_TREE);
17436 tree v2di_ftype_v2di
17437 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17438 tree v4si_ftype_v4si
17439 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17440 tree v8hi_ftype_v8hi
17441 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17442 tree v16qi_ftype_v16qi
17443 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17444 tree v4sf_ftype_v4sf
17445 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17446 tree v2df_ftype_v2df
17447 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17448 tree void_ftype_pcvoid_int_int
17449 = build_function_type_list (void_type_node,
17450 pcvoid_type_node, integer_type_node,
17451 integer_type_node, NULL_TREE);
17453 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17454 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17455 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17456 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17457 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17458 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17459 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17460 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17461 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17462 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17463 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17464 ALTIVEC_BUILTIN_LVXL_V2DF);
17465 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17466 ALTIVEC_BUILTIN_LVXL_V2DI);
17467 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17468 ALTIVEC_BUILTIN_LVXL_V4SF);
17469 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17470 ALTIVEC_BUILTIN_LVXL_V4SI);
17471 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17472 ALTIVEC_BUILTIN_LVXL_V8HI);
17473 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17474 ALTIVEC_BUILTIN_LVXL_V16QI);
17475 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17476 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17477 ALTIVEC_BUILTIN_LVX_V2DF);
17478 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17479 ALTIVEC_BUILTIN_LVX_V2DI);
17480 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17481 ALTIVEC_BUILTIN_LVX_V4SF);
17482 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17483 ALTIVEC_BUILTIN_LVX_V4SI);
17484 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17485 ALTIVEC_BUILTIN_LVX_V8HI);
17486 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17487 ALTIVEC_BUILTIN_LVX_V16QI);
17488 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17489 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17490 ALTIVEC_BUILTIN_STVX_V2DF);
17491 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17492 ALTIVEC_BUILTIN_STVX_V2DI);
17493 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17494 ALTIVEC_BUILTIN_STVX_V4SF);
17495 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17496 ALTIVEC_BUILTIN_STVX_V4SI);
17497 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17498 ALTIVEC_BUILTIN_STVX_V8HI);
17499 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17500 ALTIVEC_BUILTIN_STVX_V16QI);
17501 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17502 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17503 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17504 ALTIVEC_BUILTIN_STVXL_V2DF);
17505 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17506 ALTIVEC_BUILTIN_STVXL_V2DI);
17507 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17508 ALTIVEC_BUILTIN_STVXL_V4SF);
17509 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17510 ALTIVEC_BUILTIN_STVXL_V4SI);
17511 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17512 ALTIVEC_BUILTIN_STVXL_V8HI);
17513 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17514 ALTIVEC_BUILTIN_STVXL_V16QI);
17515 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17516 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17517 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17518 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17519 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17520 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17521 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17522 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17523 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17524 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17525 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17526 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17527 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17528 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17529 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17530 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17532 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17533 VSX_BUILTIN_LXVD2X_V2DF);
17534 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17535 VSX_BUILTIN_LXVD2X_V2DI);
17536 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17537 VSX_BUILTIN_LXVW4X_V4SF);
17538 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17539 VSX_BUILTIN_LXVW4X_V4SI);
17540 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17541 VSX_BUILTIN_LXVW4X_V8HI);
17542 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17543 VSX_BUILTIN_LXVW4X_V16QI);
17544 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17545 VSX_BUILTIN_STXVD2X_V2DF);
17546 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17547 VSX_BUILTIN_STXVD2X_V2DI);
17548 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17549 VSX_BUILTIN_STXVW4X_V4SF);
17550 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17551 VSX_BUILTIN_STXVW4X_V4SI);
17552 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17553 VSX_BUILTIN_STXVW4X_V8HI);
17554 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17555 VSX_BUILTIN_STXVW4X_V16QI);
17557 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17558 VSX_BUILTIN_LD_ELEMREV_V2DF);
17559 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17560 VSX_BUILTIN_LD_ELEMREV_V2DI);
17561 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17562 VSX_BUILTIN_LD_ELEMREV_V4SF);
17563 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17564 VSX_BUILTIN_LD_ELEMREV_V4SI);
17565 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17566 VSX_BUILTIN_ST_ELEMREV_V2DF);
17567 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17568 VSX_BUILTIN_ST_ELEMREV_V2DI);
17569 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17570 VSX_BUILTIN_ST_ELEMREV_V4SF);
17571 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17572 VSX_BUILTIN_ST_ELEMREV_V4SI);
17574 if (TARGET_P9_VECTOR)
17576 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17577 VSX_BUILTIN_LD_ELEMREV_V8HI);
17578 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17579 VSX_BUILTIN_LD_ELEMREV_V16QI);
17580 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17581 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17582 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17583 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17586 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17587 VSX_BUILTIN_VEC_LD);
17588 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17589 VSX_BUILTIN_VEC_ST);
17590 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17591 VSX_BUILTIN_VEC_XL);
17592 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17593 VSX_BUILTIN_VEC_XST);
17595 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17596 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17597 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17599 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17600 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17601 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17602 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17603 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17604 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17605 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17606 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17607 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17608 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17609 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17610 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17612 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17613 ALTIVEC_BUILTIN_VEC_ADDE);
17614 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17615 ALTIVEC_BUILTIN_VEC_ADDEC);
17616 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17617 ALTIVEC_BUILTIN_VEC_CMPNE);
17618 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17619 ALTIVEC_BUILTIN_VEC_MUL);
17621 /* Cell builtins. */
17622 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17623 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17624 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17625 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17627 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17628 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17629 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17630 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17632 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17633 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17634 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17635 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17637 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17638 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17639 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17640 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17642 if (TARGET_P9_VECTOR)
17643 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17644 P9V_BUILTIN_STXVL);
17646 /* Add the DST variants. */
17647 d = bdesc_dst;
17648 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17650 HOST_WIDE_INT mask = d->mask;
17652 if ((mask & builtin_mask) != mask)
17654 if (TARGET_DEBUG_BUILTIN)
17655 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17656 d->name);
17657 continue;
17659 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17662 /* Initialize the predicates. */
17663 d = bdesc_altivec_preds;
17664 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17666 machine_mode mode1;
17667 tree type;
17668 HOST_WIDE_INT mask = d->mask;
17670 if ((mask & builtin_mask) != mask)
17672 if (TARGET_DEBUG_BUILTIN)
17673 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17674 d->name);
17675 continue;
17678 if (rs6000_overloaded_builtin_p (d->code))
17679 mode1 = VOIDmode;
17680 else
17681 mode1 = insn_data[d->icode].operand[1].mode;
17683 switch (mode1)
17685 case VOIDmode:
17686 type = int_ftype_int_opaque_opaque;
17687 break;
17688 case V2DImode:
17689 type = int_ftype_int_v2di_v2di;
17690 break;
17691 case V4SImode:
17692 type = int_ftype_int_v4si_v4si;
17693 break;
17694 case V8HImode:
17695 type = int_ftype_int_v8hi_v8hi;
17696 break;
17697 case V16QImode:
17698 type = int_ftype_int_v16qi_v16qi;
17699 break;
17700 case V4SFmode:
17701 type = int_ftype_int_v4sf_v4sf;
17702 break;
17703 case V2DFmode:
17704 type = int_ftype_int_v2df_v2df;
17705 break;
17706 default:
17707 gcc_unreachable ();
17710 def_builtin (d->name, type, d->code);
17713 /* Initialize the abs* operators. */
17714 d = bdesc_abs;
17715 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17717 machine_mode mode0;
17718 tree type;
17719 HOST_WIDE_INT mask = d->mask;
17721 if ((mask & builtin_mask) != mask)
17723 if (TARGET_DEBUG_BUILTIN)
17724 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17725 d->name);
17726 continue;
17729 mode0 = insn_data[d->icode].operand[0].mode;
17731 switch (mode0)
17733 case V2DImode:
17734 type = v2di_ftype_v2di;
17735 break;
17736 case V4SImode:
17737 type = v4si_ftype_v4si;
17738 break;
17739 case V8HImode:
17740 type = v8hi_ftype_v8hi;
17741 break;
17742 case V16QImode:
17743 type = v16qi_ftype_v16qi;
17744 break;
17745 case V4SFmode:
17746 type = v4sf_ftype_v4sf;
17747 break;
17748 case V2DFmode:
17749 type = v2df_ftype_v2df;
17750 break;
17751 default:
17752 gcc_unreachable ();
17755 def_builtin (d->name, type, d->code);
17758 /* Initialize target builtin that implements
17759 targetm.vectorize.builtin_mask_for_load. */
17761 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17762 v16qi_ftype_long_pcvoid,
17763 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17764 BUILT_IN_MD, NULL, NULL_TREE);
17765 TREE_READONLY (decl) = 1;
17766 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17767 altivec_builtin_mask_for_load = decl;
17769 /* Access to the vec_init patterns. */
17770 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17771 integer_type_node, integer_type_node,
17772 integer_type_node, NULL_TREE);
17773 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17775 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17776 short_integer_type_node,
17777 short_integer_type_node,
17778 short_integer_type_node,
17779 short_integer_type_node,
17780 short_integer_type_node,
17781 short_integer_type_node,
17782 short_integer_type_node, NULL_TREE);
17783 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17785 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17786 char_type_node, char_type_node,
17787 char_type_node, char_type_node,
17788 char_type_node, char_type_node,
17789 char_type_node, char_type_node,
17790 char_type_node, char_type_node,
17791 char_type_node, char_type_node,
17792 char_type_node, char_type_node,
17793 char_type_node, NULL_TREE);
17794 def_builtin ("__builtin_vec_init_v16qi", ftype,
17795 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17797 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17798 float_type_node, float_type_node,
17799 float_type_node, NULL_TREE);
17800 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17802 /* VSX builtins. */
17803 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17804 double_type_node, NULL_TREE);
17805 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17807 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17808 intDI_type_node, NULL_TREE);
17809 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17811 /* Access to the vec_set patterns. */
17812 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17813 intSI_type_node,
17814 integer_type_node, NULL_TREE);
17815 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17817 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17818 intHI_type_node,
17819 integer_type_node, NULL_TREE);
17820 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17822 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17823 intQI_type_node,
17824 integer_type_node, NULL_TREE);
17825 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17827 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17828 float_type_node,
17829 integer_type_node, NULL_TREE);
17830 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17832 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17833 double_type_node,
17834 integer_type_node, NULL_TREE);
17835 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17837 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17838 intDI_type_node,
17839 integer_type_node, NULL_TREE);
17840 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17842 /* Access to the vec_extract patterns. */
17843 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17844 integer_type_node, NULL_TREE);
17845 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17847 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17848 integer_type_node, NULL_TREE);
17849 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17851 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17852 integer_type_node, NULL_TREE);
17853 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17855 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17856 integer_type_node, NULL_TREE);
17857 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17859 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17860 integer_type_node, NULL_TREE);
17861 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17863 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17864 integer_type_node, NULL_TREE);
17865 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17868 if (V1TI_type_node)
17870 tree v1ti_ftype_long_pcvoid
17871 = build_function_type_list (V1TI_type_node,
17872 long_integer_type_node, pcvoid_type_node,
17873 NULL_TREE);
17874 tree void_ftype_v1ti_long_pvoid
17875 = build_function_type_list (void_type_node,
17876 V1TI_type_node, long_integer_type_node,
17877 pvoid_type_node, NULL_TREE);
17878 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17879 VSX_BUILTIN_LXVD2X_V1TI);
17880 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17881 VSX_BUILTIN_STXVD2X_V1TI);
17882 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17883 NULL_TREE, NULL_TREE);
17884 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17885 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17886 intTI_type_node,
17887 integer_type_node, NULL_TREE);
17888 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17889 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17890 integer_type_node, NULL_TREE);
17891 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17896 static void
17897 htm_init_builtins (void)
17899 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17900 const struct builtin_description *d;
17901 size_t i;
17903 d = bdesc_htm;
17904 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17906 tree op[MAX_HTM_OPERANDS], type;
17907 HOST_WIDE_INT mask = d->mask;
17908 unsigned attr = rs6000_builtin_info[d->code].attr;
17909 bool void_func = (attr & RS6000_BTC_VOID);
17910 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17911 int nopnds = 0;
17912 tree gpr_type_node;
17913 tree rettype;
17914 tree argtype;
17916 if (TARGET_32BIT && TARGET_POWERPC64)
17917 gpr_type_node = long_long_unsigned_type_node;
17918 else
17919 gpr_type_node = long_unsigned_type_node;
17921 if (attr & RS6000_BTC_SPR)
17923 rettype = gpr_type_node;
17924 argtype = gpr_type_node;
17926 else if (d->code == HTM_BUILTIN_TABORTDC
17927 || d->code == HTM_BUILTIN_TABORTDCI)
17929 rettype = unsigned_type_node;
17930 argtype = gpr_type_node;
17932 else
17934 rettype = unsigned_type_node;
17935 argtype = unsigned_type_node;
17938 if ((mask & builtin_mask) != mask)
17940 if (TARGET_DEBUG_BUILTIN)
17941 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17942 continue;
17945 if (d->name == 0)
17947 if (TARGET_DEBUG_BUILTIN)
17948 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17949 (long unsigned) i);
17950 continue;
17953 op[nopnds++] = (void_func) ? void_type_node : rettype;
17955 if (attr_args == RS6000_BTC_UNARY)
17956 op[nopnds++] = argtype;
17957 else if (attr_args == RS6000_BTC_BINARY)
17959 op[nopnds++] = argtype;
17960 op[nopnds++] = argtype;
17962 else if (attr_args == RS6000_BTC_TERNARY)
17964 op[nopnds++] = argtype;
17965 op[nopnds++] = argtype;
17966 op[nopnds++] = argtype;
17969 switch (nopnds)
17971 case 1:
17972 type = build_function_type_list (op[0], NULL_TREE);
17973 break;
17974 case 2:
17975 type = build_function_type_list (op[0], op[1], NULL_TREE);
17976 break;
17977 case 3:
17978 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17979 break;
17980 case 4:
17981 type = build_function_type_list (op[0], op[1], op[2], op[3],
17982 NULL_TREE);
17983 break;
17984 default:
17985 gcc_unreachable ();
17988 def_builtin (d->name, type, d->code);
17992 /* Hash function for builtin functions with up to 3 arguments and a return
17993 type. */
17994 hashval_t
17995 builtin_hasher::hash (builtin_hash_struct *bh)
17997 unsigned ret = 0;
17998 int i;
18000 for (i = 0; i < 4; i++)
18002 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
18003 ret = (ret * 2) + bh->uns_p[i];
18006 return ret;
18009 /* Compare builtin hash entries H1 and H2 for equivalence. */
18010 bool
18011 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
18013 return ((p1->mode[0] == p2->mode[0])
18014 && (p1->mode[1] == p2->mode[1])
18015 && (p1->mode[2] == p2->mode[2])
18016 && (p1->mode[3] == p2->mode[3])
18017 && (p1->uns_p[0] == p2->uns_p[0])
18018 && (p1->uns_p[1] == p2->uns_p[1])
18019 && (p1->uns_p[2] == p2->uns_p[2])
18020 && (p1->uns_p[3] == p2->uns_p[3]));
18023 /* Map types for builtin functions with an explicit return type and up to 3
18024 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18025 of the argument. */
18026 static tree
18027 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
18028 machine_mode mode_arg1, machine_mode mode_arg2,
18029 enum rs6000_builtins builtin, const char *name)
18031 struct builtin_hash_struct h;
18032 struct builtin_hash_struct *h2;
18033 int num_args = 3;
18034 int i;
18035 tree ret_type = NULL_TREE;
18036 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18038 /* Create builtin_hash_table. */
18039 if (builtin_hash_table == NULL)
18040 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18042 h.type = NULL_TREE;
18043 h.mode[0] = mode_ret;
18044 h.mode[1] = mode_arg0;
18045 h.mode[2] = mode_arg1;
18046 h.mode[3] = mode_arg2;
18047 h.uns_p[0] = 0;
18048 h.uns_p[1] = 0;
18049 h.uns_p[2] = 0;
18050 h.uns_p[3] = 0;
18052 /* If the builtin is a type that produces unsigned results or takes unsigned
18053 arguments, and it is returned as a decl for the vectorizer (such as
18054 widening multiplies, permute), make sure the arguments and return value
18055 are type correct. */
18056 switch (builtin)
18058 /* unsigned 1 argument functions. */
18059 case CRYPTO_BUILTIN_VSBOX:
18060 case P8V_BUILTIN_VGBBD:
18061 case MISC_BUILTIN_CDTBCD:
18062 case MISC_BUILTIN_CBCDTD:
18063 h.uns_p[0] = 1;
18064 h.uns_p[1] = 1;
18065 break;
18067 /* unsigned 2 argument functions. */
18068 case ALTIVEC_BUILTIN_VMULEUB_UNS:
18069 case ALTIVEC_BUILTIN_VMULEUH_UNS:
18070 case ALTIVEC_BUILTIN_VMULOUB_UNS:
18071 case ALTIVEC_BUILTIN_VMULOUH_UNS:
18072 case CRYPTO_BUILTIN_VCIPHER:
18073 case CRYPTO_BUILTIN_VCIPHERLAST:
18074 case CRYPTO_BUILTIN_VNCIPHER:
18075 case CRYPTO_BUILTIN_VNCIPHERLAST:
18076 case CRYPTO_BUILTIN_VPMSUMB:
18077 case CRYPTO_BUILTIN_VPMSUMH:
18078 case CRYPTO_BUILTIN_VPMSUMW:
18079 case CRYPTO_BUILTIN_VPMSUMD:
18080 case CRYPTO_BUILTIN_VPMSUM:
18081 case MISC_BUILTIN_ADDG6S:
18082 case MISC_BUILTIN_DIVWEU:
18083 case MISC_BUILTIN_DIVWEUO:
18084 case MISC_BUILTIN_DIVDEU:
18085 case MISC_BUILTIN_DIVDEUO:
18086 h.uns_p[0] = 1;
18087 h.uns_p[1] = 1;
18088 h.uns_p[2] = 1;
18089 break;
18091 /* unsigned 3 argument functions. */
18092 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18093 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18094 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18095 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18096 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18097 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18098 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18099 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18100 case VSX_BUILTIN_VPERM_16QI_UNS:
18101 case VSX_BUILTIN_VPERM_8HI_UNS:
18102 case VSX_BUILTIN_VPERM_4SI_UNS:
18103 case VSX_BUILTIN_VPERM_2DI_UNS:
18104 case VSX_BUILTIN_XXSEL_16QI_UNS:
18105 case VSX_BUILTIN_XXSEL_8HI_UNS:
18106 case VSX_BUILTIN_XXSEL_4SI_UNS:
18107 case VSX_BUILTIN_XXSEL_2DI_UNS:
18108 case CRYPTO_BUILTIN_VPERMXOR:
18109 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18110 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18111 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18112 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18113 case CRYPTO_BUILTIN_VSHASIGMAW:
18114 case CRYPTO_BUILTIN_VSHASIGMAD:
18115 case CRYPTO_BUILTIN_VSHASIGMA:
18116 h.uns_p[0] = 1;
18117 h.uns_p[1] = 1;
18118 h.uns_p[2] = 1;
18119 h.uns_p[3] = 1;
18120 break;
18122 /* signed permute functions with unsigned char mask. */
18123 case ALTIVEC_BUILTIN_VPERM_16QI:
18124 case ALTIVEC_BUILTIN_VPERM_8HI:
18125 case ALTIVEC_BUILTIN_VPERM_4SI:
18126 case ALTIVEC_BUILTIN_VPERM_4SF:
18127 case ALTIVEC_BUILTIN_VPERM_2DI:
18128 case ALTIVEC_BUILTIN_VPERM_2DF:
18129 case VSX_BUILTIN_VPERM_16QI:
18130 case VSX_BUILTIN_VPERM_8HI:
18131 case VSX_BUILTIN_VPERM_4SI:
18132 case VSX_BUILTIN_VPERM_4SF:
18133 case VSX_BUILTIN_VPERM_2DI:
18134 case VSX_BUILTIN_VPERM_2DF:
18135 h.uns_p[3] = 1;
18136 break;
18138 /* unsigned args, signed return. */
18139 case VSX_BUILTIN_XVCVUXDDP_UNS:
18140 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18141 h.uns_p[1] = 1;
18142 break;
18144 /* signed args, unsigned return. */
18145 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18146 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18147 case MISC_BUILTIN_UNPACK_TD:
18148 case MISC_BUILTIN_UNPACK_V1TI:
18149 h.uns_p[0] = 1;
18150 break;
18152 /* unsigned arguments for 128-bit pack instructions. */
18153 case MISC_BUILTIN_PACK_TD:
18154 case MISC_BUILTIN_PACK_V1TI:
18155 h.uns_p[1] = 1;
18156 h.uns_p[2] = 1;
18157 break;
18159 default:
18160 break;
18163 /* Figure out how many args are present. */
18164 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18165 num_args--;
18167 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18168 if (!ret_type && h.uns_p[0])
18169 ret_type = builtin_mode_to_type[h.mode[0]][0];
18171 if (!ret_type)
18172 fatal_error (input_location,
18173 "internal error: builtin function %s had an unexpected "
18174 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18176 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18177 arg_type[i] = NULL_TREE;
18179 for (i = 0; i < num_args; i++)
18181 int m = (int) h.mode[i+1];
18182 int uns_p = h.uns_p[i+1];
18184 arg_type[i] = builtin_mode_to_type[m][uns_p];
18185 if (!arg_type[i] && uns_p)
18186 arg_type[i] = builtin_mode_to_type[m][0];
18188 if (!arg_type[i])
18189 fatal_error (input_location,
18190 "internal error: builtin function %s, argument %d "
18191 "had unexpected argument type %s", name, i,
18192 GET_MODE_NAME (m));
18195 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18196 if (*found == NULL)
18198 h2 = ggc_alloc<builtin_hash_struct> ();
18199 *h2 = h;
18200 *found = h2;
18202 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18203 arg_type[2], NULL_TREE);
18206 return (*found)->type;
18209 static void
18210 rs6000_common_init_builtins (void)
18212 const struct builtin_description *d;
18213 size_t i;
18215 tree opaque_ftype_opaque = NULL_TREE;
18216 tree opaque_ftype_opaque_opaque = NULL_TREE;
18217 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18218 tree v2si_ftype = NULL_TREE;
18219 tree v2si_ftype_qi = NULL_TREE;
18220 tree v2si_ftype_v2si_qi = NULL_TREE;
18221 tree v2si_ftype_int_qi = NULL_TREE;
18222 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18224 if (!TARGET_PAIRED_FLOAT)
18226 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18227 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18230 /* Paired and SPE builtins are only available if you build a compiler with
18231 the appropriate options, so only create those builtins with the
18232 appropriate compiler option. Create Altivec and VSX builtins on machines
18233 with at least the general purpose extensions (970 and newer) to allow the
18234 use of the target attribute.. */
18236 if (TARGET_EXTRA_BUILTINS)
18237 builtin_mask |= RS6000_BTM_COMMON;
18239 /* Add the ternary operators. */
18240 d = bdesc_3arg;
18241 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18243 tree type;
18244 HOST_WIDE_INT mask = d->mask;
18246 if ((mask & builtin_mask) != mask)
18248 if (TARGET_DEBUG_BUILTIN)
18249 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18250 continue;
18253 if (rs6000_overloaded_builtin_p (d->code))
18255 if (! (type = opaque_ftype_opaque_opaque_opaque))
18256 type = opaque_ftype_opaque_opaque_opaque
18257 = build_function_type_list (opaque_V4SI_type_node,
18258 opaque_V4SI_type_node,
18259 opaque_V4SI_type_node,
18260 opaque_V4SI_type_node,
18261 NULL_TREE);
18263 else
18265 enum insn_code icode = d->icode;
18266 if (d->name == 0)
18268 if (TARGET_DEBUG_BUILTIN)
18269 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18270 (long unsigned)i);
18272 continue;
18275 if (icode == CODE_FOR_nothing)
18277 if (TARGET_DEBUG_BUILTIN)
18278 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18279 d->name);
18281 continue;
18284 type = builtin_function_type (insn_data[icode].operand[0].mode,
18285 insn_data[icode].operand[1].mode,
18286 insn_data[icode].operand[2].mode,
18287 insn_data[icode].operand[3].mode,
18288 d->code, d->name);
18291 def_builtin (d->name, type, d->code);
18294 /* Add the binary operators. */
18295 d = bdesc_2arg;
18296 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18298 machine_mode mode0, mode1, mode2;
18299 tree type;
18300 HOST_WIDE_INT mask = d->mask;
18302 if ((mask & builtin_mask) != mask)
18304 if (TARGET_DEBUG_BUILTIN)
18305 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18306 continue;
18309 if (rs6000_overloaded_builtin_p (d->code))
18311 if (! (type = opaque_ftype_opaque_opaque))
18312 type = opaque_ftype_opaque_opaque
18313 = build_function_type_list (opaque_V4SI_type_node,
18314 opaque_V4SI_type_node,
18315 opaque_V4SI_type_node,
18316 NULL_TREE);
18318 else
18320 enum insn_code icode = d->icode;
18321 if (d->name == 0)
18323 if (TARGET_DEBUG_BUILTIN)
18324 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18325 (long unsigned)i);
18327 continue;
18330 if (icode == CODE_FOR_nothing)
18332 if (TARGET_DEBUG_BUILTIN)
18333 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18334 d->name);
18336 continue;
18339 mode0 = insn_data[icode].operand[0].mode;
18340 mode1 = insn_data[icode].operand[1].mode;
18341 mode2 = insn_data[icode].operand[2].mode;
18343 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18345 if (! (type = v2si_ftype_v2si_qi))
18346 type = v2si_ftype_v2si_qi
18347 = build_function_type_list (opaque_V2SI_type_node,
18348 opaque_V2SI_type_node,
18349 char_type_node,
18350 NULL_TREE);
18353 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18354 && mode2 == QImode)
18356 if (! (type = v2si_ftype_int_qi))
18357 type = v2si_ftype_int_qi
18358 = build_function_type_list (opaque_V2SI_type_node,
18359 integer_type_node,
18360 char_type_node,
18361 NULL_TREE);
18364 else
18365 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18366 d->code, d->name);
18369 def_builtin (d->name, type, d->code);
18372 /* Add the simple unary operators. */
18373 d = bdesc_1arg;
18374 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18376 machine_mode mode0, mode1;
18377 tree type;
18378 HOST_WIDE_INT mask = d->mask;
18380 if ((mask & builtin_mask) != mask)
18382 if (TARGET_DEBUG_BUILTIN)
18383 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18384 continue;
18387 if (rs6000_overloaded_builtin_p (d->code))
18389 if (! (type = opaque_ftype_opaque))
18390 type = opaque_ftype_opaque
18391 = build_function_type_list (opaque_V4SI_type_node,
18392 opaque_V4SI_type_node,
18393 NULL_TREE);
18395 else
18397 enum insn_code icode = d->icode;
18398 if (d->name == 0)
18400 if (TARGET_DEBUG_BUILTIN)
18401 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18402 (long unsigned)i);
18404 continue;
18407 if (icode == CODE_FOR_nothing)
18409 if (TARGET_DEBUG_BUILTIN)
18410 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18411 d->name);
18413 continue;
18416 mode0 = insn_data[icode].operand[0].mode;
18417 mode1 = insn_data[icode].operand[1].mode;
18419 if (mode0 == V2SImode && mode1 == QImode)
18421 if (! (type = v2si_ftype_qi))
18422 type = v2si_ftype_qi
18423 = build_function_type_list (opaque_V2SI_type_node,
18424 char_type_node,
18425 NULL_TREE);
18428 else
18429 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18430 d->code, d->name);
18433 def_builtin (d->name, type, d->code);
18436 /* Add the simple no-argument operators. */
18437 d = bdesc_0arg;
18438 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18440 machine_mode mode0;
18441 tree type;
18442 HOST_WIDE_INT mask = d->mask;
18444 if ((mask & builtin_mask) != mask)
18446 if (TARGET_DEBUG_BUILTIN)
18447 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18448 continue;
18450 if (rs6000_overloaded_builtin_p (d->code))
18452 if (!opaque_ftype_opaque)
18453 opaque_ftype_opaque
18454 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18455 type = opaque_ftype_opaque;
18457 else
18459 enum insn_code icode = d->icode;
18460 if (d->name == 0)
18462 if (TARGET_DEBUG_BUILTIN)
18463 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18464 (long unsigned) i);
18465 continue;
18467 if (icode == CODE_FOR_nothing)
18469 if (TARGET_DEBUG_BUILTIN)
18470 fprintf (stderr,
18471 "rs6000_builtin, skip no-argument %s (no code)\n",
18472 d->name);
18473 continue;
18475 mode0 = insn_data[icode].operand[0].mode;
18476 if (mode0 == V2SImode)
18478 /* code for SPE */
18479 if (! (type = v2si_ftype))
18481 v2si_ftype
18482 = build_function_type_list (opaque_V2SI_type_node,
18483 NULL_TREE);
18484 type = v2si_ftype;
18487 else
18488 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18489 d->code, d->name);
18491 def_builtin (d->name, type, d->code);
18495 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18496 static void
18497 init_float128_ibm (machine_mode mode)
18499 if (!TARGET_XL_COMPAT)
18501 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18502 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18503 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18504 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18506 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18508 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18509 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18510 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18511 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18512 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18513 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18514 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18516 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18517 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18518 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18519 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18520 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18521 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18522 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18523 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18526 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18527 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18529 else
18531 set_optab_libfunc (add_optab, mode, "_xlqadd");
18532 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18533 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18534 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18537 /* Add various conversions for IFmode to use the traditional TFmode
18538 names. */
18539 if (mode == IFmode)
18541 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18542 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18543 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18544 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18545 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18546 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18548 if (TARGET_POWERPC64)
18550 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18551 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18552 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18553 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18558 /* Set up IEEE 128-bit floating point routines. Use different names if the
18559 arguments can be passed in a vector register. The historical PowerPC
18560 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18561 continue to use that if we aren't using vector registers to pass IEEE
18562 128-bit floating point. */
18564 static void
18565 init_float128_ieee (machine_mode mode)
18567 if (FLOAT128_VECTOR_P (mode))
18569 set_optab_libfunc (add_optab, mode, "__addkf3");
18570 set_optab_libfunc (sub_optab, mode, "__subkf3");
18571 set_optab_libfunc (neg_optab, mode, "__negkf2");
18572 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18573 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18574 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18575 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18577 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18578 set_optab_libfunc (ne_optab, mode, "__nekf2");
18579 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18580 set_optab_libfunc (ge_optab, mode, "__gekf2");
18581 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18582 set_optab_libfunc (le_optab, mode, "__lekf2");
18583 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18585 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18586 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18587 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18588 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18590 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18591 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18592 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18594 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18595 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18596 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18598 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18599 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18600 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18601 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18602 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18603 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18605 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18606 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18607 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18608 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18610 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18611 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18612 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18613 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18615 if (TARGET_POWERPC64)
18617 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18618 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18619 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18620 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18624 else
18626 set_optab_libfunc (add_optab, mode, "_q_add");
18627 set_optab_libfunc (sub_optab, mode, "_q_sub");
18628 set_optab_libfunc (neg_optab, mode, "_q_neg");
18629 set_optab_libfunc (smul_optab, mode, "_q_mul");
18630 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18631 if (TARGET_PPC_GPOPT)
18632 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18634 set_optab_libfunc (eq_optab, mode, "_q_feq");
18635 set_optab_libfunc (ne_optab, mode, "_q_fne");
18636 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18637 set_optab_libfunc (ge_optab, mode, "_q_fge");
18638 set_optab_libfunc (lt_optab, mode, "_q_flt");
18639 set_optab_libfunc (le_optab, mode, "_q_fle");
18641 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18642 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18643 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18644 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18645 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18646 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18647 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18648 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18652 static void
18653 rs6000_init_libfuncs (void)
18655 /* __float128 support. */
18656 if (TARGET_FLOAT128_TYPE)
18658 init_float128_ibm (IFmode);
18659 init_float128_ieee (KFmode);
18662 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18663 if (TARGET_LONG_DOUBLE_128)
18665 if (!TARGET_IEEEQUAD)
18666 init_float128_ibm (TFmode);
18668 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18669 else
18670 init_float128_ieee (TFmode);
18675 /* Expand a block clear operation, and return 1 if successful. Return 0
18676 if we should let the compiler generate normal code.
18678 operands[0] is the destination
18679 operands[1] is the length
18680 operands[3] is the alignment */
18683 expand_block_clear (rtx operands[])
18685 rtx orig_dest = operands[0];
18686 rtx bytes_rtx = operands[1];
18687 rtx align_rtx = operands[3];
18688 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18689 HOST_WIDE_INT align;
18690 HOST_WIDE_INT bytes;
18691 int offset;
18692 int clear_bytes;
18693 int clear_step;
18695 /* If this is not a fixed size move, just call memcpy */
18696 if (! constp)
18697 return 0;
18699 /* This must be a fixed size alignment */
18700 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18701 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18703 /* Anything to clear? */
18704 bytes = INTVAL (bytes_rtx);
18705 if (bytes <= 0)
18706 return 1;
18708 /* Use the builtin memset after a point, to avoid huge code bloat.
18709 When optimize_size, avoid any significant code bloat; calling
18710 memset is about 4 instructions, so allow for one instruction to
18711 load zero and three to do clearing. */
18712 if (TARGET_ALTIVEC && align >= 128)
18713 clear_step = 16;
18714 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18715 clear_step = 8;
18716 else if (TARGET_SPE && align >= 64)
18717 clear_step = 8;
18718 else
18719 clear_step = 4;
18721 if (optimize_size && bytes > 3 * clear_step)
18722 return 0;
18723 if (! optimize_size && bytes > 8 * clear_step)
18724 return 0;
18726 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18728 machine_mode mode = BLKmode;
18729 rtx dest;
18731 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18733 clear_bytes = 16;
18734 mode = V4SImode;
18736 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18738 clear_bytes = 8;
18739 mode = V2SImode;
18741 else if (bytes >= 8 && TARGET_POWERPC64
18742 && (align >= 64 || !STRICT_ALIGNMENT))
18744 clear_bytes = 8;
18745 mode = DImode;
18746 if (offset == 0 && align < 64)
18748 rtx addr;
18750 /* If the address form is reg+offset with offset not a
18751 multiple of four, reload into reg indirect form here
18752 rather than waiting for reload. This way we get one
18753 reload, not one per store. */
18754 addr = XEXP (orig_dest, 0);
18755 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18756 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18757 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18759 addr = copy_addr_to_reg (addr);
18760 orig_dest = replace_equiv_address (orig_dest, addr);
18764 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18765 { /* move 4 bytes */
18766 clear_bytes = 4;
18767 mode = SImode;
18769 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18770 { /* move 2 bytes */
18771 clear_bytes = 2;
18772 mode = HImode;
18774 else /* move 1 byte at a time */
18776 clear_bytes = 1;
18777 mode = QImode;
18780 dest = adjust_address (orig_dest, mode, offset);
18782 emit_move_insn (dest, CONST0_RTX (mode));
18785 return 1;
18788 /* Emit a potentially record-form instruction, setting DST from SRC.
18789 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18790 signed comparison of DST with zero. If DOT is 1, the generated RTL
18791 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18792 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18793 a separate COMPARE. */
18795 static void
18796 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18798 if (dot == 0)
18800 emit_move_insn (dst, src);
18801 return;
18804 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18806 emit_move_insn (dst, src);
18807 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18808 return;
18811 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18812 if (dot == 1)
18814 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18815 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18817 else
18819 rtx set = gen_rtx_SET (dst, src);
18820 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18825 /* Figure out the correct instructions to generate to load data for
18826 block compare. MODE is used for the read from memory, and
18827 data is zero extended if REG is wider than MODE. If LE code
18828 is being generated, bswap loads are used.
18830 REG is the destination register to move the data into.
18831 MEM is the memory block being read.
18832 MODE is the mode of memory to use for the read. */
18833 static void
18834 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
18836 switch (GET_MODE (reg))
18838 case DImode:
18839 switch (mode)
18841 case QImode:
18842 emit_insn (gen_zero_extendqidi2 (reg, mem));
18843 break;
18844 case HImode:
18846 rtx src = mem;
18847 if (!BYTES_BIG_ENDIAN)
18849 src = gen_reg_rtx (HImode);
18850 emit_insn (gen_bswaphi2 (src, mem));
18852 emit_insn (gen_zero_extendhidi2 (reg, src));
18853 break;
18855 case SImode:
18857 rtx src = mem;
18858 if (!BYTES_BIG_ENDIAN)
18860 src = gen_reg_rtx (SImode);
18861 emit_insn (gen_bswapsi2 (src, mem));
18863 emit_insn (gen_zero_extendsidi2 (reg, src));
18865 break;
18866 case DImode:
18867 if (!BYTES_BIG_ENDIAN)
18868 emit_insn (gen_bswapdi2 (reg, mem));
18869 else
18870 emit_insn (gen_movdi (reg, mem));
18871 break;
18872 default:
18873 gcc_unreachable ();
18875 break;
18877 case SImode:
18878 switch (mode)
18880 case QImode:
18881 emit_insn (gen_zero_extendqisi2 (reg, mem));
18882 break;
18883 case HImode:
18885 rtx src = mem;
18886 if (!BYTES_BIG_ENDIAN)
18888 src = gen_reg_rtx (HImode);
18889 emit_insn (gen_bswaphi2 (src, mem));
18891 emit_insn (gen_zero_extendhisi2 (reg, src));
18892 break;
18894 case SImode:
18895 if (!BYTES_BIG_ENDIAN)
18896 emit_insn (gen_bswapsi2 (reg, mem));
18897 else
18898 emit_insn (gen_movsi (reg, mem));
18899 break;
18900 case DImode:
18901 /* DImode is larger than the destination reg so is not expected. */
18902 gcc_unreachable ();
18903 break;
18904 default:
18905 gcc_unreachable ();
18907 break;
18908 default:
18909 gcc_unreachable ();
18910 break;
18914 /* Select the mode to be used for reading the next chunk of bytes
18915 in the compare.
18917 OFFSET is the current read offset from the beginning of the block.
18918 BYTES is the number of bytes remaining to be read.
18919 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
18920 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
18921 the largest allowable mode. */
18922 static machine_mode
18923 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
18924 HOST_WIDE_INT align, bool word_mode_ok)
18926 /* First see if we can do a whole load unit
18927 as that will be more efficient than a larger load + shift. */
18929 /* If big, use biggest chunk.
18930 If exactly chunk size, use that size.
18931 If remainder can be done in one piece with shifting, do that.
18932 Do largest chunk possible without violating alignment rules. */
18934 /* The most we can read without potential page crossing. */
18935 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
18937 if (word_mode_ok && bytes >= UNITS_PER_WORD)
18938 return word_mode;
18939 else if (bytes == GET_MODE_SIZE (SImode))
18940 return SImode;
18941 else if (bytes == GET_MODE_SIZE (HImode))
18942 return HImode;
18943 else if (bytes == GET_MODE_SIZE (QImode))
18944 return QImode;
18945 else if (bytes < GET_MODE_SIZE (SImode)
18946 && offset >= GET_MODE_SIZE (SImode) - bytes)
18947 /* This matches the case were we have SImode and 3 bytes
18948 and offset >= 1 and permits us to move back one and overlap
18949 with the previous read, thus avoiding having to shift
18950 unwanted bytes off of the input. */
18951 return SImode;
18952 else if (word_mode_ok && bytes < UNITS_PER_WORD
18953 && offset >= UNITS_PER_WORD-bytes)
18954 /* Similarly, if we can use DImode it will get matched here and
18955 can do an overlapping read that ends at the end of the block. */
18956 return word_mode;
18957 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
18958 /* It is safe to do all remaining in one load of largest size,
18959 possibly with a shift to get rid of unwanted bytes. */
18960 return word_mode;
18961 else if (maxread >= GET_MODE_SIZE (SImode))
18962 /* It is safe to do all remaining in one SImode load,
18963 possibly with a shift to get rid of unwanted bytes. */
18964 return SImode;
18965 else if (bytes > GET_MODE_SIZE (SImode))
18966 return SImode;
18967 else if (bytes > GET_MODE_SIZE (HImode))
18968 return HImode;
18970 /* final fallback is do one byte */
18971 return QImode;
18974 /* Compute the alignment of pointer+OFFSET where the original alignment
18975 of pointer was BASE_ALIGN. */
18976 static HOST_WIDE_INT
18977 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
18979 if (offset == 0)
18980 return base_align;
18981 return min (base_align, offset & -offset);
18984 /* Expand a block compare operation, and return true if successful.
18985 Return false if we should let the compiler generate normal code,
18986 probably a memcmp call.
18988 OPERANDS[0] is the target (result).
18989 OPERANDS[1] is the first source.
18990 OPERANDS[2] is the second source.
18991 OPERANDS[3] is the length.
18992 OPERANDS[4] is the alignment. */
18993 bool
18994 expand_block_compare (rtx operands[])
18996 rtx target = operands[0];
18997 rtx orig_src1 = operands[1];
18998 rtx orig_src2 = operands[2];
18999 rtx bytes_rtx = operands[3];
19000 rtx align_rtx = operands[4];
19001 HOST_WIDE_INT cmp_bytes = 0;
19002 rtx src1 = orig_src1;
19003 rtx src2 = orig_src2;
19005 /* If this is not a fixed size compare, just call memcmp */
19006 if (!CONST_INT_P (bytes_rtx))
19007 return false;
19009 /* This must be a fixed size alignment */
19010 if (!CONST_INT_P (align_rtx))
19011 return false;
19013 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
19015 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
19016 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
19017 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
19018 return false;
19020 gcc_assert (GET_MODE (target) == SImode);
19022 /* Anything to move? */
19023 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
19024 if (bytes <= 0)
19025 return true;
19027 /* The code generated for p7 and older is not faster than glibc
19028 memcmp if alignment is small and length is not short, so bail
19029 out to avoid those conditions. */
19030 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19031 && ((base_align == 1 && bytes > 16)
19032 || (base_align == 2 && bytes > 32)))
19033 return false;
19035 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
19036 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
19038 /* If we have an LE target without ldbrx and word_mode is DImode,
19039 then we must avoid using word_mode. */
19040 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
19041 && word_mode == DImode);
19043 /* Strategy phase. How many ops will this take and should we expand it? */
19045 int offset = 0;
19046 machine_mode load_mode =
19047 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
19048 int load_mode_size = GET_MODE_SIZE (load_mode);
19050 /* We don't want to generate too much code. */
19051 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
19052 > rs6000_block_compare_inline_limit)
19053 return false;
19055 bool generate_6432_conversion = false;
19056 rtx convert_label = NULL;
19057 rtx final_label = NULL;
19059 /* Example of generated code for 11 bytes aligned 1 byte:
19060 .L10:
19061 ldbrx 10,6,9
19062 ldbrx 9,7,9
19063 subf. 9,9,10
19064 bne 0,.L8
19065 addi 9,4,7
19066 lwbrx 10,0,9
19067 addi 9,5,7
19068 lwbrx 9,0,9
19069 subf 9,9,10
19070 b .L9
19071 .L8: # convert_label
19072 cntlzd 9,9
19073 addi 9,9,-1
19074 xori 9,9,0x3f
19075 .L9: # final_label
19077 We start off with DImode and have a compare/branch to something
19078 with a smaller mode then we will need a block with the DI->SI conversion
19079 that may or may not be executed. */
19081 while (bytes > 0)
19083 int align = compute_current_alignment (base_align, offset);
19084 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19085 load_mode = select_block_compare_mode (offset, bytes, align,
19086 word_mode_ok);
19087 else
19088 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19089 load_mode_size = GET_MODE_SIZE (load_mode);
19090 if (bytes >= load_mode_size)
19091 cmp_bytes = load_mode_size;
19092 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19094 /* Move this load back so it doesn't go past the end.
19095 P8/P9 can do this efficiently. */
19096 int extra_bytes = load_mode_size - bytes;
19097 cmp_bytes = bytes;
19098 if (extra_bytes < offset)
19100 offset -= extra_bytes;
19101 cmp_bytes = load_mode_size;
19102 bytes = cmp_bytes;
19105 else
19106 /* P7 and earlier can't do the overlapping load trick fast,
19107 so this forces a non-overlapping load and a shift to get
19108 rid of the extra bytes. */
19109 cmp_bytes = bytes;
19111 src1 = adjust_address (orig_src1, load_mode, offset);
19112 src2 = adjust_address (orig_src2, load_mode, offset);
19114 if (!REG_P (XEXP (src1, 0)))
19116 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19117 src1 = replace_equiv_address (src1, src1_reg);
19119 set_mem_size (src1, cmp_bytes);
19121 if (!REG_P (XEXP (src2, 0)))
19123 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19124 src2 = replace_equiv_address (src2, src2_reg);
19126 set_mem_size (src2, cmp_bytes);
19128 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19129 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19131 if (cmp_bytes < load_mode_size)
19133 /* Shift unneeded bytes off. */
19134 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
19135 if (word_mode == DImode)
19137 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
19138 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
19140 else
19142 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19143 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19147 /* We previously did a block that need 64->32 conversion but
19148 the current block does not, so a label is needed to jump
19149 to the end. */
19150 if (generate_6432_conversion && !final_label
19151 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
19152 final_label = gen_label_rtx ();
19154 /* Do we need a 64->32 conversion block? */
19155 int remain = bytes - cmp_bytes;
19156 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
19158 generate_6432_conversion = true;
19159 if (remain > 0 && !convert_label)
19160 convert_label = gen_label_rtx ();
19163 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
19165 /* Target is larger than load size so we don't need to
19166 reduce result size. */
19167 if (remain > 0)
19169 /* This is not the last block, branch to the end if the result
19170 of this subtract is not zero. */
19171 if (!final_label)
19172 final_label = gen_label_rtx ();
19173 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19174 rtx cond = gen_reg_rtx (CCmode);
19175 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19176 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19177 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19178 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19179 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19180 fin_ref, pc_rtx);
19181 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19182 JUMP_LABEL (j) = final_label;
19183 LABEL_NUSES (final_label) += 1;
19185 else
19187 if (word_mode == DImode)
19189 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19190 tmp_reg_src2));
19191 emit_insn (gen_movsi (target,
19192 gen_lowpart (SImode, tmp_reg_src2)));
19194 else
19195 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
19197 if (final_label)
19199 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19200 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19201 JUMP_LABEL(j) = final_label;
19202 LABEL_NUSES (final_label) += 1;
19203 emit_barrier ();
19207 else
19209 generate_6432_conversion = true;
19210 if (remain > 0)
19212 if (!convert_label)
19213 convert_label = gen_label_rtx ();
19215 /* Compare to zero and branch to convert_label if not zero. */
19216 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
19217 rtx cond = gen_reg_rtx (CCmode);
19218 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
19219 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19220 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19221 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19222 cvt_ref, pc_rtx);
19223 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19224 JUMP_LABEL(j) = convert_label;
19225 LABEL_NUSES (convert_label) += 1;
19227 else
19229 /* Just do the subtract. Since this is the last block the
19230 convert code will be generated immediately following. */
19231 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19232 tmp_reg_src2));
19236 offset += cmp_bytes;
19237 bytes -= cmp_bytes;
19240 if (generate_6432_conversion)
19242 if (convert_label)
19243 emit_label (convert_label);
19245 /* We need to produce DI result from sub, then convert to target SI
19246 while maintaining <0 / ==0 / >0 properties.
19247 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
19248 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
19249 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
19250 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
19251 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19254 if (final_label)
19255 emit_label (final_label);
19257 gcc_assert (bytes == 0);
19258 return true;
19262 /* Expand a block move operation, and return 1 if successful. Return 0
19263 if we should let the compiler generate normal code.
19265 operands[0] is the destination
19266 operands[1] is the source
19267 operands[2] is the length
19268 operands[3] is the alignment */
19270 #define MAX_MOVE_REG 4
19273 expand_block_move (rtx operands[])
19275 rtx orig_dest = operands[0];
19276 rtx orig_src = operands[1];
19277 rtx bytes_rtx = operands[2];
19278 rtx align_rtx = operands[3];
19279 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
19280 int align;
19281 int bytes;
19282 int offset;
19283 int move_bytes;
19284 rtx stores[MAX_MOVE_REG];
19285 int num_reg = 0;
19287 /* If this is not a fixed size move, just call memcpy */
19288 if (! constp)
19289 return 0;
19291 /* This must be a fixed size alignment */
19292 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19293 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19295 /* Anything to move? */
19296 bytes = INTVAL (bytes_rtx);
19297 if (bytes <= 0)
19298 return 1;
19300 if (bytes > rs6000_block_move_inline_limit)
19301 return 0;
19303 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
19305 union {
19306 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
19307 rtx (*mov) (rtx, rtx);
19308 } gen_func;
19309 machine_mode mode = BLKmode;
19310 rtx src, dest;
19312 /* Altivec first, since it will be faster than a string move
19313 when it applies, and usually not significantly larger. */
19314 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
19316 move_bytes = 16;
19317 mode = V4SImode;
19318 gen_func.mov = gen_movv4si;
19320 else if (TARGET_SPE && bytes >= 8 && align >= 64)
19322 move_bytes = 8;
19323 mode = V2SImode;
19324 gen_func.mov = gen_movv2si;
19326 else if (TARGET_STRING
19327 && bytes > 24 /* move up to 32 bytes at a time */
19328 && ! fixed_regs[5]
19329 && ! fixed_regs[6]
19330 && ! fixed_regs[7]
19331 && ! fixed_regs[8]
19332 && ! fixed_regs[9]
19333 && ! fixed_regs[10]
19334 && ! fixed_regs[11]
19335 && ! fixed_regs[12])
19337 move_bytes = (bytes > 32) ? 32 : bytes;
19338 gen_func.movmemsi = gen_movmemsi_8reg;
19340 else if (TARGET_STRING
19341 && bytes > 16 /* move up to 24 bytes at a time */
19342 && ! fixed_regs[5]
19343 && ! fixed_regs[6]
19344 && ! fixed_regs[7]
19345 && ! fixed_regs[8]
19346 && ! fixed_regs[9]
19347 && ! fixed_regs[10])
19349 move_bytes = (bytes > 24) ? 24 : bytes;
19350 gen_func.movmemsi = gen_movmemsi_6reg;
19352 else if (TARGET_STRING
19353 && bytes > 8 /* move up to 16 bytes at a time */
19354 && ! fixed_regs[5]
19355 && ! fixed_regs[6]
19356 && ! fixed_regs[7]
19357 && ! fixed_regs[8])
19359 move_bytes = (bytes > 16) ? 16 : bytes;
19360 gen_func.movmemsi = gen_movmemsi_4reg;
19362 else if (bytes >= 8 && TARGET_POWERPC64
19363 && (align >= 64 || !STRICT_ALIGNMENT))
19365 move_bytes = 8;
19366 mode = DImode;
19367 gen_func.mov = gen_movdi;
19368 if (offset == 0 && align < 64)
19370 rtx addr;
19372 /* If the address form is reg+offset with offset not a
19373 multiple of four, reload into reg indirect form here
19374 rather than waiting for reload. This way we get one
19375 reload, not one per load and/or store. */
19376 addr = XEXP (orig_dest, 0);
19377 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19378 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19379 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19381 addr = copy_addr_to_reg (addr);
19382 orig_dest = replace_equiv_address (orig_dest, addr);
19384 addr = XEXP (orig_src, 0);
19385 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19386 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19387 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19389 addr = copy_addr_to_reg (addr);
19390 orig_src = replace_equiv_address (orig_src, addr);
19394 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
19395 { /* move up to 8 bytes at a time */
19396 move_bytes = (bytes > 8) ? 8 : bytes;
19397 gen_func.movmemsi = gen_movmemsi_2reg;
19399 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19400 { /* move 4 bytes */
19401 move_bytes = 4;
19402 mode = SImode;
19403 gen_func.mov = gen_movsi;
19405 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19406 { /* move 2 bytes */
19407 move_bytes = 2;
19408 mode = HImode;
19409 gen_func.mov = gen_movhi;
19411 else if (TARGET_STRING && bytes > 1)
19412 { /* move up to 4 bytes at a time */
19413 move_bytes = (bytes > 4) ? 4 : bytes;
19414 gen_func.movmemsi = gen_movmemsi_1reg;
19416 else /* move 1 byte at a time */
19418 move_bytes = 1;
19419 mode = QImode;
19420 gen_func.mov = gen_movqi;
19423 src = adjust_address (orig_src, mode, offset);
19424 dest = adjust_address (orig_dest, mode, offset);
19426 if (mode != BLKmode)
19428 rtx tmp_reg = gen_reg_rtx (mode);
19430 emit_insn ((*gen_func.mov) (tmp_reg, src));
19431 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
19434 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
19436 int i;
19437 for (i = 0; i < num_reg; i++)
19438 emit_insn (stores[i]);
19439 num_reg = 0;
19442 if (mode == BLKmode)
19444 /* Move the address into scratch registers. The movmemsi
19445 patterns require zero offset. */
19446 if (!REG_P (XEXP (src, 0)))
19448 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
19449 src = replace_equiv_address (src, src_reg);
19451 set_mem_size (src, move_bytes);
19453 if (!REG_P (XEXP (dest, 0)))
19455 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
19456 dest = replace_equiv_address (dest, dest_reg);
19458 set_mem_size (dest, move_bytes);
19460 emit_insn ((*gen_func.movmemsi) (dest, src,
19461 GEN_INT (move_bytes & 31),
19462 align_rtx));
19466 return 1;
19470 /* Return a string to perform a load_multiple operation.
19471 operands[0] is the vector.
19472 operands[1] is the source address.
19473 operands[2] is the first destination register. */
19475 const char *
19476 rs6000_output_load_multiple (rtx operands[3])
19478 /* We have to handle the case where the pseudo used to contain the address
19479 is assigned to one of the output registers. */
19480 int i, j;
19481 int words = XVECLEN (operands[0], 0);
19482 rtx xop[10];
19484 if (XVECLEN (operands[0], 0) == 1)
19485 return "lwz %2,0(%1)";
19487 for (i = 0; i < words; i++)
19488 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
19490 if (i == words-1)
19492 xop[0] = GEN_INT (4 * (words-1));
19493 xop[1] = operands[1];
19494 xop[2] = operands[2];
19495 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
19496 return "";
19498 else if (i == 0)
19500 xop[0] = GEN_INT (4 * (words-1));
19501 xop[1] = operands[1];
19502 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
19503 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
19504 return "";
19506 else
19508 for (j = 0; j < words; j++)
19509 if (j != i)
19511 xop[0] = GEN_INT (j * 4);
19512 xop[1] = operands[1];
19513 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
19514 output_asm_insn ("lwz %2,%0(%1)", xop);
19516 xop[0] = GEN_INT (i * 4);
19517 xop[1] = operands[1];
19518 output_asm_insn ("lwz %1,%0(%1)", xop);
19519 return "";
19523 return "lswi %2,%1,%N0";
19527 /* A validation routine: say whether CODE, a condition code, and MODE
19528 match. The other alternatives either don't make sense or should
19529 never be generated. */
19531 void
19532 validate_condition_mode (enum rtx_code code, machine_mode mode)
19534 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
19535 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
19536 && GET_MODE_CLASS (mode) == MODE_CC);
19538 /* These don't make sense. */
19539 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
19540 || mode != CCUNSmode);
19542 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
19543 || mode == CCUNSmode);
19545 gcc_assert (mode == CCFPmode
19546 || (code != ORDERED && code != UNORDERED
19547 && code != UNEQ && code != LTGT
19548 && code != UNGT && code != UNLT
19549 && code != UNGE && code != UNLE));
19551 /* These should never be generated except for
19552 flag_finite_math_only. */
19553 gcc_assert (mode != CCFPmode
19554 || flag_finite_math_only
19555 || (code != LE && code != GE
19556 && code != UNEQ && code != LTGT
19557 && code != UNGT && code != UNLT));
19559 /* These are invalid; the information is not there. */
19560 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
19564 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
19565 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
19566 not zero, store there the bit offset (counted from the right) where
19567 the single stretch of 1 bits begins; and similarly for B, the bit
19568 offset where it ends. */
19570 bool
19571 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
19573 unsigned HOST_WIDE_INT val = INTVAL (mask);
19574 unsigned HOST_WIDE_INT bit;
19575 int nb, ne;
19576 int n = GET_MODE_PRECISION (mode);
19578 if (mode != DImode && mode != SImode)
19579 return false;
19581 if (INTVAL (mask) >= 0)
19583 bit = val & -val;
19584 ne = exact_log2 (bit);
19585 nb = exact_log2 (val + bit);
19587 else if (val + 1 == 0)
19589 nb = n;
19590 ne = 0;
19592 else if (val & 1)
19594 val = ~val;
19595 bit = val & -val;
19596 nb = exact_log2 (bit);
19597 ne = exact_log2 (val + bit);
19599 else
19601 bit = val & -val;
19602 ne = exact_log2 (bit);
19603 if (val + bit == 0)
19604 nb = n;
19605 else
19606 nb = 0;
19609 nb--;
19611 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
19612 return false;
19614 if (b)
19615 *b = nb;
19616 if (e)
19617 *e = ne;
19619 return true;
19622 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
19623 or rldicr instruction, to implement an AND with it in mode MODE. */
19625 bool
19626 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
19628 int nb, ne;
19630 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19631 return false;
19633 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
19634 does not wrap. */
19635 if (mode == DImode)
19636 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
19638 /* For SImode, rlwinm can do everything. */
19639 if (mode == SImode)
19640 return (nb < 32 && ne < 32);
19642 return false;
19645 /* Return the instruction template for an AND with mask in mode MODE, with
19646 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19648 const char *
19649 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
19651 int nb, ne;
19653 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
19654 gcc_unreachable ();
19656 if (mode == DImode && ne == 0)
19658 operands[3] = GEN_INT (63 - nb);
19659 if (dot)
19660 return "rldicl. %0,%1,0,%3";
19661 return "rldicl %0,%1,0,%3";
19664 if (mode == DImode && nb == 63)
19666 operands[3] = GEN_INT (63 - ne);
19667 if (dot)
19668 return "rldicr. %0,%1,0,%3";
19669 return "rldicr %0,%1,0,%3";
19672 if (nb < 32 && ne < 32)
19674 operands[3] = GEN_INT (31 - nb);
19675 operands[4] = GEN_INT (31 - ne);
19676 if (dot)
19677 return "rlwinm. %0,%1,0,%3,%4";
19678 return "rlwinm %0,%1,0,%3,%4";
19681 gcc_unreachable ();
19684 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
19685 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
19686 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
19688 bool
19689 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
19691 int nb, ne;
19693 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19694 return false;
19696 int n = GET_MODE_PRECISION (mode);
19697 int sh = -1;
19699 if (CONST_INT_P (XEXP (shift, 1)))
19701 sh = INTVAL (XEXP (shift, 1));
19702 if (sh < 0 || sh >= n)
19703 return false;
19706 rtx_code code = GET_CODE (shift);
19708 /* Convert any shift by 0 to a rotate, to simplify below code. */
19709 if (sh == 0)
19710 code = ROTATE;
19712 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19713 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19714 code = ASHIFT;
19715 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19717 code = LSHIFTRT;
19718 sh = n - sh;
19721 /* DImode rotates need rld*. */
19722 if (mode == DImode && code == ROTATE)
19723 return (nb == 63 || ne == 0 || ne == sh);
19725 /* SImode rotates need rlw*. */
19726 if (mode == SImode && code == ROTATE)
19727 return (nb < 32 && ne < 32 && sh < 32);
19729 /* Wrap-around masks are only okay for rotates. */
19730 if (ne > nb)
19731 return false;
19733 /* Variable shifts are only okay for rotates. */
19734 if (sh < 0)
19735 return false;
19737 /* Don't allow ASHIFT if the mask is wrong for that. */
19738 if (code == ASHIFT && ne < sh)
19739 return false;
19741 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
19742 if the mask is wrong for that. */
19743 if (nb < 32 && ne < 32 && sh < 32
19744 && !(code == LSHIFTRT && nb >= 32 - sh))
19745 return true;
19747 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
19748 if the mask is wrong for that. */
19749 if (code == LSHIFTRT)
19750 sh = 64 - sh;
19751 if (nb == 63 || ne == 0 || ne == sh)
19752 return !(code == LSHIFTRT && nb >= sh);
19754 return false;
19757 /* Return the instruction template for a shift with mask in mode MODE, with
19758 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19760 const char *
19761 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
19763 int nb, ne;
19765 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19766 gcc_unreachable ();
19768 if (mode == DImode && ne == 0)
19770 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19771 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19772 operands[3] = GEN_INT (63 - nb);
19773 if (dot)
19774 return "rld%I2cl. %0,%1,%2,%3";
19775 return "rld%I2cl %0,%1,%2,%3";
19778 if (mode == DImode && nb == 63)
19780 operands[3] = GEN_INT (63 - ne);
19781 if (dot)
19782 return "rld%I2cr. %0,%1,%2,%3";
19783 return "rld%I2cr %0,%1,%2,%3";
19786 if (mode == DImode
19787 && GET_CODE (operands[4]) != LSHIFTRT
19788 && CONST_INT_P (operands[2])
19789 && ne == INTVAL (operands[2]))
19791 operands[3] = GEN_INT (63 - nb);
19792 if (dot)
19793 return "rld%I2c. %0,%1,%2,%3";
19794 return "rld%I2c %0,%1,%2,%3";
19797 if (nb < 32 && ne < 32)
19799 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19800 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19801 operands[3] = GEN_INT (31 - nb);
19802 operands[4] = GEN_INT (31 - ne);
19803 /* This insn can also be a 64-bit rotate with mask that really makes
19804 it just a shift right (with mask); the %h below are to adjust for
19805 that situation (shift count is >= 32 in that case). */
19806 if (dot)
19807 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19808 return "rlw%I2nm %0,%1,%h2,%3,%4";
19811 gcc_unreachable ();
19814 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19815 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19816 ASHIFT, or LSHIFTRT) in mode MODE. */
19818 bool
19819 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19821 int nb, ne;
19823 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19824 return false;
19826 int n = GET_MODE_PRECISION (mode);
19828 int sh = INTVAL (XEXP (shift, 1));
19829 if (sh < 0 || sh >= n)
19830 return false;
19832 rtx_code code = GET_CODE (shift);
19834 /* Convert any shift by 0 to a rotate, to simplify below code. */
19835 if (sh == 0)
19836 code = ROTATE;
19838 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19839 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19840 code = ASHIFT;
19841 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19843 code = LSHIFTRT;
19844 sh = n - sh;
19847 /* DImode rotates need rldimi. */
19848 if (mode == DImode && code == ROTATE)
19849 return (ne == sh);
19851 /* SImode rotates need rlwimi. */
19852 if (mode == SImode && code == ROTATE)
19853 return (nb < 32 && ne < 32 && sh < 32);
19855 /* Wrap-around masks are only okay for rotates. */
19856 if (ne > nb)
19857 return false;
19859 /* Don't allow ASHIFT if the mask is wrong for that. */
19860 if (code == ASHIFT && ne < sh)
19861 return false;
19863 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19864 if the mask is wrong for that. */
19865 if (nb < 32 && ne < 32 && sh < 32
19866 && !(code == LSHIFTRT && nb >= 32 - sh))
19867 return true;
19869 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19870 if the mask is wrong for that. */
19871 if (code == LSHIFTRT)
19872 sh = 64 - sh;
19873 if (ne == sh)
19874 return !(code == LSHIFTRT && nb >= sh);
19876 return false;
19879 /* Return the instruction template for an insert with mask in mode MODE, with
19880 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19882 const char *
19883 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19885 int nb, ne;
19887 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19888 gcc_unreachable ();
19890 /* Prefer rldimi because rlwimi is cracked. */
19891 if (TARGET_POWERPC64
19892 && (!dot || mode == DImode)
19893 && GET_CODE (operands[4]) != LSHIFTRT
19894 && ne == INTVAL (operands[2]))
19896 operands[3] = GEN_INT (63 - nb);
19897 if (dot)
19898 return "rldimi. %0,%1,%2,%3";
19899 return "rldimi %0,%1,%2,%3";
19902 if (nb < 32 && ne < 32)
19904 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19905 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19906 operands[3] = GEN_INT (31 - nb);
19907 operands[4] = GEN_INT (31 - ne);
19908 if (dot)
19909 return "rlwimi. %0,%1,%2,%3,%4";
19910 return "rlwimi %0,%1,%2,%3,%4";
19913 gcc_unreachable ();
19916 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19917 using two machine instructions. */
19919 bool
19920 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19922 /* There are two kinds of AND we can handle with two insns:
19923 1) those we can do with two rl* insn;
19924 2) ori[s];xori[s].
19926 We do not handle that last case yet. */
19928 /* If there is just one stretch of ones, we can do it. */
19929 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19930 return true;
19932 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19933 one insn, we can do the whole thing with two. */
19934 unsigned HOST_WIDE_INT val = INTVAL (c);
19935 unsigned HOST_WIDE_INT bit1 = val & -val;
19936 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19937 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19938 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19939 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19942 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19943 If EXPAND is true, split rotate-and-mask instructions we generate to
19944 their constituent parts as well (this is used during expand); if DOT
19945 is 1, make the last insn a record-form instruction clobbering the
19946 destination GPR and setting the CC reg (from operands[3]); if 2, set
19947 that GPR as well as the CC reg. */
19949 void
19950 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19952 gcc_assert (!(expand && dot));
19954 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19956 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19957 shift right. This generates better code than doing the masks without
19958 shifts, or shifting first right and then left. */
19959 int nb, ne;
19960 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19962 gcc_assert (mode == DImode);
19964 int shift = 63 - nb;
19965 if (expand)
19967 rtx tmp1 = gen_reg_rtx (DImode);
19968 rtx tmp2 = gen_reg_rtx (DImode);
19969 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19970 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19971 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19973 else
19975 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19976 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19977 emit_move_insn (operands[0], tmp);
19978 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19979 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19981 return;
19984 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19985 that does the rest. */
19986 unsigned HOST_WIDE_INT bit1 = val & -val;
19987 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19988 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19989 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19991 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19992 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19994 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19996 /* Two "no-rotate"-and-mask instructions, for SImode. */
19997 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19999 gcc_assert (mode == SImode);
20001 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
20002 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
20003 emit_move_insn (reg, tmp);
20004 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
20005 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20006 return;
20009 gcc_assert (mode == DImode);
20011 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
20012 insns; we have to do the first in SImode, because it wraps. */
20013 if (mask2 <= 0xffffffff
20014 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
20016 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
20017 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
20018 GEN_INT (mask1));
20019 rtx reg_low = gen_lowpart (SImode, reg);
20020 emit_move_insn (reg_low, tmp);
20021 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
20022 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20023 return;
20026 /* Two rld* insns: rotate, clear the hole in the middle (which now is
20027 at the top end), rotate back and clear the other hole. */
20028 int right = exact_log2 (bit3);
20029 int left = 64 - right;
20031 /* Rotate the mask too. */
20032 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
20034 if (expand)
20036 rtx tmp1 = gen_reg_rtx (DImode);
20037 rtx tmp2 = gen_reg_rtx (DImode);
20038 rtx tmp3 = gen_reg_rtx (DImode);
20039 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
20040 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
20041 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
20042 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
20044 else
20046 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
20047 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
20048 emit_move_insn (operands[0], tmp);
20049 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
20050 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
20051 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20055 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
20056 for lfq and stfq insns iff the registers are hard registers. */
20059 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
20061 /* We might have been passed a SUBREG. */
20062 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
20063 return 0;
20065 /* We might have been passed non floating point registers. */
20066 if (!FP_REGNO_P (REGNO (reg1))
20067 || !FP_REGNO_P (REGNO (reg2)))
20068 return 0;
20070 return (REGNO (reg1) == REGNO (reg2) - 1);
20073 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
20074 addr1 and addr2 must be in consecutive memory locations
20075 (addr2 == addr1 + 8). */
20078 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
20080 rtx addr1, addr2;
20081 unsigned int reg1, reg2;
20082 int offset1, offset2;
20084 /* The mems cannot be volatile. */
20085 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
20086 return 0;
20088 addr1 = XEXP (mem1, 0);
20089 addr2 = XEXP (mem2, 0);
20091 /* Extract an offset (if used) from the first addr. */
20092 if (GET_CODE (addr1) == PLUS)
20094 /* If not a REG, return zero. */
20095 if (GET_CODE (XEXP (addr1, 0)) != REG)
20096 return 0;
20097 else
20099 reg1 = REGNO (XEXP (addr1, 0));
20100 /* The offset must be constant! */
20101 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
20102 return 0;
20103 offset1 = INTVAL (XEXP (addr1, 1));
20106 else if (GET_CODE (addr1) != REG)
20107 return 0;
20108 else
20110 reg1 = REGNO (addr1);
20111 /* This was a simple (mem (reg)) expression. Offset is 0. */
20112 offset1 = 0;
20115 /* And now for the second addr. */
20116 if (GET_CODE (addr2) == PLUS)
20118 /* If not a REG, return zero. */
20119 if (GET_CODE (XEXP (addr2, 0)) != REG)
20120 return 0;
20121 else
20123 reg2 = REGNO (XEXP (addr2, 0));
20124 /* The offset must be constant. */
20125 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
20126 return 0;
20127 offset2 = INTVAL (XEXP (addr2, 1));
20130 else if (GET_CODE (addr2) != REG)
20131 return 0;
20132 else
20134 reg2 = REGNO (addr2);
20135 /* This was a simple (mem (reg)) expression. Offset is 0. */
20136 offset2 = 0;
20139 /* Both of these must have the same base register. */
20140 if (reg1 != reg2)
20141 return 0;
20143 /* The offset for the second addr must be 8 more than the first addr. */
20144 if (offset2 != offset1 + 8)
20145 return 0;
20147 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
20148 instructions. */
20149 return 1;
20154 rs6000_secondary_memory_needed_rtx (machine_mode mode)
20156 static bool eliminated = false;
20157 rtx ret;
20159 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
20160 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20161 else
20163 rtx mem = cfun->machine->sdmode_stack_slot;
20164 gcc_assert (mem != NULL_RTX);
20166 if (!eliminated)
20168 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
20169 cfun->machine->sdmode_stack_slot = mem;
20170 eliminated = true;
20172 ret = mem;
20175 if (TARGET_DEBUG_ADDR)
20177 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
20178 GET_MODE_NAME (mode));
20179 if (!ret)
20180 fprintf (stderr, "\tNULL_RTX\n");
20181 else
20182 debug_rtx (ret);
20185 return ret;
20188 /* Return the mode to be used for memory when a secondary memory
20189 location is needed. For SDmode values we need to use DDmode, in
20190 all other cases we can use the same mode. */
20191 machine_mode
20192 rs6000_secondary_memory_needed_mode (machine_mode mode)
20194 if (lra_in_progress && mode == SDmode)
20195 return DDmode;
20196 return mode;
20199 static tree
20200 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
20202 /* Don't walk into types. */
20203 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
20205 *walk_subtrees = 0;
20206 return NULL_TREE;
20209 switch (TREE_CODE (*tp))
20211 case VAR_DECL:
20212 case PARM_DECL:
20213 case FIELD_DECL:
20214 case RESULT_DECL:
20215 case SSA_NAME:
20216 case REAL_CST:
20217 case MEM_REF:
20218 case VIEW_CONVERT_EXPR:
20219 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
20220 return *tp;
20221 break;
20222 default:
20223 break;
20226 return NULL_TREE;
20229 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
20230 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
20231 only work on the traditional altivec registers, note if an altivec register
20232 was chosen. */
20234 static enum rs6000_reg_type
20235 register_to_reg_type (rtx reg, bool *is_altivec)
20237 HOST_WIDE_INT regno;
20238 enum reg_class rclass;
20240 if (GET_CODE (reg) == SUBREG)
20241 reg = SUBREG_REG (reg);
20243 if (!REG_P (reg))
20244 return NO_REG_TYPE;
20246 regno = REGNO (reg);
20247 if (regno >= FIRST_PSEUDO_REGISTER)
20249 if (!lra_in_progress && !reload_in_progress && !reload_completed)
20250 return PSEUDO_REG_TYPE;
20252 regno = true_regnum (reg);
20253 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
20254 return PSEUDO_REG_TYPE;
20257 gcc_assert (regno >= 0);
20259 if (is_altivec && ALTIVEC_REGNO_P (regno))
20260 *is_altivec = true;
20262 rclass = rs6000_regno_regclass[regno];
20263 return reg_class_to_reg_type[(int)rclass];
20266 /* Helper function to return the cost of adding a TOC entry address. */
20268 static inline int
20269 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
20271 int ret;
20273 if (TARGET_CMODEL != CMODEL_SMALL)
20274 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
20276 else
20277 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
20279 return ret;
20282 /* Helper function for rs6000_secondary_reload to determine whether the memory
20283 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
20284 needs reloading. Return negative if the memory is not handled by the memory
20285 helper functions and to try a different reload method, 0 if no additional
20286 instructions are need, and positive to give the extra cost for the
20287 memory. */
20289 static int
20290 rs6000_secondary_reload_memory (rtx addr,
20291 enum reg_class rclass,
20292 machine_mode mode)
20294 int extra_cost = 0;
20295 rtx reg, and_arg, plus_arg0, plus_arg1;
20296 addr_mask_type addr_mask;
20297 const char *type = NULL;
20298 const char *fail_msg = NULL;
20300 if (GPR_REG_CLASS_P (rclass))
20301 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20303 else if (rclass == FLOAT_REGS)
20304 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20306 else if (rclass == ALTIVEC_REGS)
20307 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20309 /* For the combined VSX_REGS, turn off Altivec AND -16. */
20310 else if (rclass == VSX_REGS)
20311 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
20312 & ~RELOAD_REG_AND_M16);
20314 /* If the register allocator hasn't made up its mind yet on the register
20315 class to use, settle on defaults to use. */
20316 else if (rclass == NO_REGS)
20318 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
20319 & ~RELOAD_REG_AND_M16);
20321 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
20322 addr_mask &= ~(RELOAD_REG_INDEXED
20323 | RELOAD_REG_PRE_INCDEC
20324 | RELOAD_REG_PRE_MODIFY);
20327 else
20328 addr_mask = 0;
20330 /* If the register isn't valid in this register class, just return now. */
20331 if ((addr_mask & RELOAD_REG_VALID) == 0)
20333 if (TARGET_DEBUG_ADDR)
20335 fprintf (stderr,
20336 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20337 "not valid in class\n",
20338 GET_MODE_NAME (mode), reg_class_names[rclass]);
20339 debug_rtx (addr);
20342 return -1;
20345 switch (GET_CODE (addr))
20347 /* Does the register class supports auto update forms for this mode? We
20348 don't need a scratch register, since the powerpc only supports
20349 PRE_INC, PRE_DEC, and PRE_MODIFY. */
20350 case PRE_INC:
20351 case PRE_DEC:
20352 reg = XEXP (addr, 0);
20353 if (!base_reg_operand (addr, GET_MODE (reg)))
20355 fail_msg = "no base register #1";
20356 extra_cost = -1;
20359 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20361 extra_cost = 1;
20362 type = "update";
20364 break;
20366 case PRE_MODIFY:
20367 reg = XEXP (addr, 0);
20368 plus_arg1 = XEXP (addr, 1);
20369 if (!base_reg_operand (reg, GET_MODE (reg))
20370 || GET_CODE (plus_arg1) != PLUS
20371 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
20373 fail_msg = "bad PRE_MODIFY";
20374 extra_cost = -1;
20377 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20379 extra_cost = 1;
20380 type = "update";
20382 break;
20384 /* Do we need to simulate AND -16 to clear the bottom address bits used
20385 in VMX load/stores? Only allow the AND for vector sizes. */
20386 case AND:
20387 and_arg = XEXP (addr, 0);
20388 if (GET_MODE_SIZE (mode) != 16
20389 || GET_CODE (XEXP (addr, 1)) != CONST_INT
20390 || INTVAL (XEXP (addr, 1)) != -16)
20392 fail_msg = "bad Altivec AND #1";
20393 extra_cost = -1;
20396 if (rclass != ALTIVEC_REGS)
20398 if (legitimate_indirect_address_p (and_arg, false))
20399 extra_cost = 1;
20401 else if (legitimate_indexed_address_p (and_arg, false))
20402 extra_cost = 2;
20404 else
20406 fail_msg = "bad Altivec AND #2";
20407 extra_cost = -1;
20410 type = "and";
20412 break;
20414 /* If this is an indirect address, make sure it is a base register. */
20415 case REG:
20416 case SUBREG:
20417 if (!legitimate_indirect_address_p (addr, false))
20419 extra_cost = 1;
20420 type = "move";
20422 break;
20424 /* If this is an indexed address, make sure the register class can handle
20425 indexed addresses for this mode. */
20426 case PLUS:
20427 plus_arg0 = XEXP (addr, 0);
20428 plus_arg1 = XEXP (addr, 1);
20430 /* (plus (plus (reg) (constant)) (constant)) is generated during
20431 push_reload processing, so handle it now. */
20432 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
20434 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20436 extra_cost = 1;
20437 type = "offset";
20441 /* (plus (plus (reg) (constant)) (reg)) is also generated during
20442 push_reload processing, so handle it now. */
20443 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
20445 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20447 extra_cost = 1;
20448 type = "indexed #2";
20452 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
20454 fail_msg = "no base register #2";
20455 extra_cost = -1;
20458 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
20460 if ((addr_mask & RELOAD_REG_INDEXED) == 0
20461 || !legitimate_indexed_address_p (addr, false))
20463 extra_cost = 1;
20464 type = "indexed";
20468 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
20469 && CONST_INT_P (plus_arg1))
20471 if (!quad_address_offset_p (INTVAL (plus_arg1)))
20473 extra_cost = 1;
20474 type = "vector d-form offset";
20478 /* Make sure the register class can handle offset addresses. */
20479 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20481 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20483 extra_cost = 1;
20484 type = "offset #2";
20488 else
20490 fail_msg = "bad PLUS";
20491 extra_cost = -1;
20494 break;
20496 case LO_SUM:
20497 /* Quad offsets are restricted and can't handle normal addresses. */
20498 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20500 extra_cost = -1;
20501 type = "vector d-form lo_sum";
20504 else if (!legitimate_lo_sum_address_p (mode, addr, false))
20506 fail_msg = "bad LO_SUM";
20507 extra_cost = -1;
20510 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20512 extra_cost = 1;
20513 type = "lo_sum";
20515 break;
20517 /* Static addresses need to create a TOC entry. */
20518 case CONST:
20519 case SYMBOL_REF:
20520 case LABEL_REF:
20521 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20523 extra_cost = -1;
20524 type = "vector d-form lo_sum #2";
20527 else
20529 type = "address";
20530 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
20532 break;
20534 /* TOC references look like offsetable memory. */
20535 case UNSPEC:
20536 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
20538 fail_msg = "bad UNSPEC";
20539 extra_cost = -1;
20542 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20544 extra_cost = -1;
20545 type = "vector d-form lo_sum #3";
20548 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20550 extra_cost = 1;
20551 type = "toc reference";
20553 break;
20555 default:
20557 fail_msg = "bad address";
20558 extra_cost = -1;
20562 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
20564 if (extra_cost < 0)
20565 fprintf (stderr,
20566 "rs6000_secondary_reload_memory error: mode = %s, "
20567 "class = %s, addr_mask = '%s', %s\n",
20568 GET_MODE_NAME (mode),
20569 reg_class_names[rclass],
20570 rs6000_debug_addr_mask (addr_mask, false),
20571 (fail_msg != NULL) ? fail_msg : "<bad address>");
20573 else
20574 fprintf (stderr,
20575 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20576 "addr_mask = '%s', extra cost = %d, %s\n",
20577 GET_MODE_NAME (mode),
20578 reg_class_names[rclass],
20579 rs6000_debug_addr_mask (addr_mask, false),
20580 extra_cost,
20581 (type) ? type : "<none>");
20583 debug_rtx (addr);
20586 return extra_cost;
20589 /* Helper function for rs6000_secondary_reload to return true if a move to a
20590 different register classe is really a simple move. */
20592 static bool
20593 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20594 enum rs6000_reg_type from_type,
20595 machine_mode mode)
20597 int size = GET_MODE_SIZE (mode);
20599 /* Add support for various direct moves available. In this function, we only
20600 look at cases where we don't need any extra registers, and one or more
20601 simple move insns are issued. Originally small integers are not allowed
20602 in FPR/VSX registers. Single precision binary floating is not a simple
20603 move because we need to convert to the single precision memory layout.
20604 The 4-byte SDmode can be moved. TDmode values are disallowed since they
20605 need special direct move handling, which we do not support yet. */
20606 if (TARGET_DIRECT_MOVE
20607 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20608 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
20610 if (TARGET_POWERPC64)
20612 /* ISA 2.07: MTVSRD or MVFVSRD. */
20613 if (size == 8)
20614 return true;
20616 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
20617 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
20618 return true;
20621 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20622 if (TARGET_VSX_SMALL_INTEGER)
20624 if (mode == SImode)
20625 return true;
20627 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
20628 return true;
20631 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20632 if (mode == SDmode)
20633 return true;
20636 /* Power6+: MFTGPR or MFFGPR. */
20637 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
20638 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
20639 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20640 return true;
20642 /* Move to/from SPR. */
20643 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
20644 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
20645 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20646 return true;
20648 return false;
20651 /* Direct move helper function for rs6000_secondary_reload, handle all of the
20652 special direct moves that involve allocating an extra register, return the
20653 insn code of the helper function if there is such a function or
20654 CODE_FOR_nothing if not. */
20656 static bool
20657 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
20658 enum rs6000_reg_type from_type,
20659 machine_mode mode,
20660 secondary_reload_info *sri,
20661 bool altivec_p)
20663 bool ret = false;
20664 enum insn_code icode = CODE_FOR_nothing;
20665 int cost = 0;
20666 int size = GET_MODE_SIZE (mode);
20668 if (TARGET_POWERPC64 && size == 16)
20670 /* Handle moving 128-bit values from GPRs to VSX point registers on
20671 ISA 2.07 (power8, power9) when running in 64-bit mode using
20672 XXPERMDI to glue the two 64-bit values back together. */
20673 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20675 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
20676 icode = reg_addr[mode].reload_vsx_gpr;
20679 /* Handle moving 128-bit values from VSX point registers to GPRs on
20680 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
20681 bottom 64-bit value. */
20682 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20684 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
20685 icode = reg_addr[mode].reload_gpr_vsx;
20689 else if (TARGET_POWERPC64 && mode == SFmode)
20691 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20693 cost = 3; /* xscvdpspn, mfvsrd, and. */
20694 icode = reg_addr[mode].reload_gpr_vsx;
20697 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20699 cost = 2; /* mtvsrz, xscvspdpn. */
20700 icode = reg_addr[mode].reload_vsx_gpr;
20704 else if (!TARGET_POWERPC64 && size == 8)
20706 /* Handle moving 64-bit values from GPRs to floating point registers on
20707 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
20708 32-bit values back together. Altivec register classes must be handled
20709 specially since a different instruction is used, and the secondary
20710 reload support requires a single instruction class in the scratch
20711 register constraint. However, right now TFmode is not allowed in
20712 Altivec registers, so the pattern will never match. */
20713 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
20715 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
20716 icode = reg_addr[mode].reload_fpr_gpr;
20720 if (icode != CODE_FOR_nothing)
20722 ret = true;
20723 if (sri)
20725 sri->icode = icode;
20726 sri->extra_cost = cost;
20730 return ret;
20733 /* Return whether a move between two register classes can be done either
20734 directly (simple move) or via a pattern that uses a single extra temporary
20735 (using ISA 2.07's direct move in this case. */
20737 static bool
20738 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
20739 enum rs6000_reg_type from_type,
20740 machine_mode mode,
20741 secondary_reload_info *sri,
20742 bool altivec_p)
20744 /* Fall back to load/store reloads if either type is not a register. */
20745 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
20746 return false;
20748 /* If we haven't allocated registers yet, assume the move can be done for the
20749 standard register types. */
20750 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
20751 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
20752 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
20753 return true;
20755 /* Moves to the same set of registers is a simple move for non-specialized
20756 registers. */
20757 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
20758 return true;
20760 /* Check whether a simple move can be done directly. */
20761 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
20763 if (sri)
20765 sri->icode = CODE_FOR_nothing;
20766 sri->extra_cost = 0;
20768 return true;
20771 /* Now check if we can do it in a few steps. */
20772 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
20773 altivec_p);
20776 /* Inform reload about cases where moving X with a mode MODE to a register in
20777 RCLASS requires an extra scratch or immediate register. Return the class
20778 needed for the immediate register.
20780 For VSX and Altivec, we may need a register to convert sp+offset into
20781 reg+sp.
20783 For misaligned 64-bit gpr loads and stores we need a register to
20784 convert an offset address to indirect. */
20786 static reg_class_t
20787 rs6000_secondary_reload (bool in_p,
20788 rtx x,
20789 reg_class_t rclass_i,
20790 machine_mode mode,
20791 secondary_reload_info *sri)
20793 enum reg_class rclass = (enum reg_class) rclass_i;
20794 reg_class_t ret = ALL_REGS;
20795 enum insn_code icode;
20796 bool default_p = false;
20797 bool done_p = false;
20799 /* Allow subreg of memory before/during reload. */
20800 bool memory_p = (MEM_P (x)
20801 || (!reload_completed && GET_CODE (x) == SUBREG
20802 && MEM_P (SUBREG_REG (x))));
20804 sri->icode = CODE_FOR_nothing;
20805 sri->t_icode = CODE_FOR_nothing;
20806 sri->extra_cost = 0;
20807 icode = ((in_p)
20808 ? reg_addr[mode].reload_load
20809 : reg_addr[mode].reload_store);
20811 if (REG_P (x) || register_operand (x, mode))
20813 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20814 bool altivec_p = (rclass == ALTIVEC_REGS);
20815 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20817 if (!in_p)
20818 std::swap (to_type, from_type);
20820 /* Can we do a direct move of some sort? */
20821 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20822 altivec_p))
20824 icode = (enum insn_code)sri->icode;
20825 default_p = false;
20826 done_p = true;
20827 ret = NO_REGS;
20831 /* Make sure 0.0 is not reloaded or forced into memory. */
20832 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20834 ret = NO_REGS;
20835 default_p = false;
20836 done_p = true;
20839 /* If this is a scalar floating point value and we want to load it into the
20840 traditional Altivec registers, do it via a move via a traditional floating
20841 point register, unless we have D-form addressing. Also make sure that
20842 non-zero constants use a FPR. */
20843 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20844 && !mode_supports_vmx_dform (mode)
20845 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20846 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20848 ret = FLOAT_REGS;
20849 default_p = false;
20850 done_p = true;
20853 /* Handle reload of load/stores if we have reload helper functions. */
20854 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20856 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20857 mode);
20859 if (extra_cost >= 0)
20861 done_p = true;
20862 ret = NO_REGS;
20863 if (extra_cost > 0)
20865 sri->extra_cost = extra_cost;
20866 sri->icode = icode;
20871 /* Handle unaligned loads and stores of integer registers. */
20872 if (!done_p && TARGET_POWERPC64
20873 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20874 && memory_p
20875 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20877 rtx addr = XEXP (x, 0);
20878 rtx off = address_offset (addr);
20880 if (off != NULL_RTX)
20882 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20883 unsigned HOST_WIDE_INT offset = INTVAL (off);
20885 /* We need a secondary reload when our legitimate_address_p
20886 says the address is good (as otherwise the entire address
20887 will be reloaded), and the offset is not a multiple of
20888 four or we have an address wrap. Address wrap will only
20889 occur for LO_SUMs since legitimate_offset_address_p
20890 rejects addresses for 16-byte mems that will wrap. */
20891 if (GET_CODE (addr) == LO_SUM
20892 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20893 && ((offset & 3) != 0
20894 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20895 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20896 && (offset & 3) != 0))
20898 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20899 if (in_p)
20900 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20901 : CODE_FOR_reload_di_load);
20902 else
20903 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20904 : CODE_FOR_reload_di_store);
20905 sri->extra_cost = 2;
20906 ret = NO_REGS;
20907 done_p = true;
20909 else
20910 default_p = true;
20912 else
20913 default_p = true;
20916 if (!done_p && !TARGET_POWERPC64
20917 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20918 && memory_p
20919 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20921 rtx addr = XEXP (x, 0);
20922 rtx off = address_offset (addr);
20924 if (off != NULL_RTX)
20926 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20927 unsigned HOST_WIDE_INT offset = INTVAL (off);
20929 /* We need a secondary reload when our legitimate_address_p
20930 says the address is good (as otherwise the entire address
20931 will be reloaded), and we have a wrap.
20933 legitimate_lo_sum_address_p allows LO_SUM addresses to
20934 have any offset so test for wrap in the low 16 bits.
20936 legitimate_offset_address_p checks for the range
20937 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20938 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20939 [0x7ff4,0x7fff] respectively, so test for the
20940 intersection of these ranges, [0x7ffc,0x7fff] and
20941 [0x7ff4,0x7ff7] respectively.
20943 Note that the address we see here may have been
20944 manipulated by legitimize_reload_address. */
20945 if (GET_CODE (addr) == LO_SUM
20946 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20947 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20949 if (in_p)
20950 sri->icode = CODE_FOR_reload_si_load;
20951 else
20952 sri->icode = CODE_FOR_reload_si_store;
20953 sri->extra_cost = 2;
20954 ret = NO_REGS;
20955 done_p = true;
20957 else
20958 default_p = true;
20960 else
20961 default_p = true;
20964 if (!done_p)
20965 default_p = true;
20967 if (default_p)
20968 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20970 gcc_assert (ret != ALL_REGS);
20972 if (TARGET_DEBUG_ADDR)
20974 fprintf (stderr,
20975 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20976 "mode = %s",
20977 reg_class_names[ret],
20978 in_p ? "true" : "false",
20979 reg_class_names[rclass],
20980 GET_MODE_NAME (mode));
20982 if (reload_completed)
20983 fputs (", after reload", stderr);
20985 if (!done_p)
20986 fputs (", done_p not set", stderr);
20988 if (default_p)
20989 fputs (", default secondary reload", stderr);
20991 if (sri->icode != CODE_FOR_nothing)
20992 fprintf (stderr, ", reload func = %s, extra cost = %d",
20993 insn_data[sri->icode].name, sri->extra_cost);
20995 else if (sri->extra_cost > 0)
20996 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20998 fputs ("\n", stderr);
20999 debug_rtx (x);
21002 return ret;
21005 /* Better tracing for rs6000_secondary_reload_inner. */
21007 static void
21008 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
21009 bool store_p)
21011 rtx set, clobber;
21013 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
21015 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
21016 store_p ? "store" : "load");
21018 if (store_p)
21019 set = gen_rtx_SET (mem, reg);
21020 else
21021 set = gen_rtx_SET (reg, mem);
21023 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
21024 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
21027 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
21028 ATTRIBUTE_NORETURN;
21030 static void
21031 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
21032 bool store_p)
21034 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
21035 gcc_unreachable ();
21038 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
21039 reload helper functions. These were identified in
21040 rs6000_secondary_reload_memory, and if reload decided to use the secondary
21041 reload, it calls the insns:
21042 reload_<RELOAD:mode>_<P:mptrsize>_store
21043 reload_<RELOAD:mode>_<P:mptrsize>_load
21045 which in turn calls this function, to do whatever is necessary to create
21046 valid addresses. */
21048 void
21049 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
21051 int regno = true_regnum (reg);
21052 machine_mode mode = GET_MODE (reg);
21053 addr_mask_type addr_mask;
21054 rtx addr;
21055 rtx new_addr;
21056 rtx op_reg, op0, op1;
21057 rtx and_op;
21058 rtx cc_clobber;
21059 rtvec rv;
21061 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
21062 || !base_reg_operand (scratch, GET_MODE (scratch)))
21063 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21065 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
21066 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21068 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
21069 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21071 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
21072 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21074 else
21075 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21077 /* Make sure the mode is valid in this register class. */
21078 if ((addr_mask & RELOAD_REG_VALID) == 0)
21079 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21081 if (TARGET_DEBUG_ADDR)
21082 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
21084 new_addr = addr = XEXP (mem, 0);
21085 switch (GET_CODE (addr))
21087 /* Does the register class support auto update forms for this mode? If
21088 not, do the update now. We don't need a scratch register, since the
21089 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
21090 case PRE_INC:
21091 case PRE_DEC:
21092 op_reg = XEXP (addr, 0);
21093 if (!base_reg_operand (op_reg, Pmode))
21094 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21096 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21098 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
21099 new_addr = op_reg;
21101 break;
21103 case PRE_MODIFY:
21104 op0 = XEXP (addr, 0);
21105 op1 = XEXP (addr, 1);
21106 if (!base_reg_operand (op0, Pmode)
21107 || GET_CODE (op1) != PLUS
21108 || !rtx_equal_p (op0, XEXP (op1, 0)))
21109 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21111 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21113 emit_insn (gen_rtx_SET (op0, op1));
21114 new_addr = reg;
21116 break;
21118 /* Do we need to simulate AND -16 to clear the bottom address bits used
21119 in VMX load/stores? */
21120 case AND:
21121 op0 = XEXP (addr, 0);
21122 op1 = XEXP (addr, 1);
21123 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
21125 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
21126 op_reg = op0;
21128 else if (GET_CODE (op1) == PLUS)
21130 emit_insn (gen_rtx_SET (scratch, op1));
21131 op_reg = scratch;
21134 else
21135 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21137 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
21138 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
21139 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
21140 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
21141 new_addr = scratch;
21143 break;
21145 /* If this is an indirect address, make sure it is a base register. */
21146 case REG:
21147 case SUBREG:
21148 if (!base_reg_operand (addr, GET_MODE (addr)))
21150 emit_insn (gen_rtx_SET (scratch, addr));
21151 new_addr = scratch;
21153 break;
21155 /* If this is an indexed address, make sure the register class can handle
21156 indexed addresses for this mode. */
21157 case PLUS:
21158 op0 = XEXP (addr, 0);
21159 op1 = XEXP (addr, 1);
21160 if (!base_reg_operand (op0, Pmode))
21161 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21163 else if (int_reg_operand (op1, Pmode))
21165 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21167 emit_insn (gen_rtx_SET (scratch, addr));
21168 new_addr = scratch;
21172 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
21174 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
21175 || !quad_address_p (addr, mode, false))
21177 emit_insn (gen_rtx_SET (scratch, addr));
21178 new_addr = scratch;
21182 /* Make sure the register class can handle offset addresses. */
21183 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21185 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21187 emit_insn (gen_rtx_SET (scratch, addr));
21188 new_addr = scratch;
21192 else
21193 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21195 break;
21197 case LO_SUM:
21198 op0 = XEXP (addr, 0);
21199 op1 = XEXP (addr, 1);
21200 if (!base_reg_operand (op0, Pmode))
21201 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21203 else if (int_reg_operand (op1, Pmode))
21205 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21207 emit_insn (gen_rtx_SET (scratch, addr));
21208 new_addr = scratch;
21212 /* Quad offsets are restricted and can't handle normal addresses. */
21213 else if (mode_supports_vsx_dform_quad (mode))
21215 emit_insn (gen_rtx_SET (scratch, addr));
21216 new_addr = scratch;
21219 /* Make sure the register class can handle offset addresses. */
21220 else if (legitimate_lo_sum_address_p (mode, addr, false))
21222 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21224 emit_insn (gen_rtx_SET (scratch, addr));
21225 new_addr = scratch;
21229 else
21230 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21232 break;
21234 case SYMBOL_REF:
21235 case CONST:
21236 case LABEL_REF:
21237 rs6000_emit_move (scratch, addr, Pmode);
21238 new_addr = scratch;
21239 break;
21241 default:
21242 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21245 /* Adjust the address if it changed. */
21246 if (addr != new_addr)
21248 mem = replace_equiv_address_nv (mem, new_addr);
21249 if (TARGET_DEBUG_ADDR)
21250 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
21253 /* Now create the move. */
21254 if (store_p)
21255 emit_insn (gen_rtx_SET (mem, reg));
21256 else
21257 emit_insn (gen_rtx_SET (reg, mem));
21259 return;
21262 /* Convert reloads involving 64-bit gprs and misaligned offset
21263 addressing, or multiple 32-bit gprs and offsets that are too large,
21264 to use indirect addressing. */
21266 void
21267 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
21269 int regno = true_regnum (reg);
21270 enum reg_class rclass;
21271 rtx addr;
21272 rtx scratch_or_premodify = scratch;
21274 if (TARGET_DEBUG_ADDR)
21276 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
21277 store_p ? "store" : "load");
21278 fprintf (stderr, "reg:\n");
21279 debug_rtx (reg);
21280 fprintf (stderr, "mem:\n");
21281 debug_rtx (mem);
21282 fprintf (stderr, "scratch:\n");
21283 debug_rtx (scratch);
21286 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
21287 gcc_assert (GET_CODE (mem) == MEM);
21288 rclass = REGNO_REG_CLASS (regno);
21289 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
21290 addr = XEXP (mem, 0);
21292 if (GET_CODE (addr) == PRE_MODIFY)
21294 gcc_assert (REG_P (XEXP (addr, 0))
21295 && GET_CODE (XEXP (addr, 1)) == PLUS
21296 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
21297 scratch_or_premodify = XEXP (addr, 0);
21298 if (!HARD_REGISTER_P (scratch_or_premodify))
21299 /* If we have a pseudo here then reload will have arranged
21300 to have it replaced, but only in the original insn.
21301 Use the replacement here too. */
21302 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21304 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
21305 expressions from the original insn, without unsharing them.
21306 Any RTL that points into the original insn will of course
21307 have register replacements applied. That is why we don't
21308 need to look for replacements under the PLUS. */
21309 addr = XEXP (addr, 1);
21311 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
21313 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
21315 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
21317 /* Now create the move. */
21318 if (store_p)
21319 emit_insn (gen_rtx_SET (mem, reg));
21320 else
21321 emit_insn (gen_rtx_SET (reg, mem));
21323 return;
21326 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
21327 this function has any SDmode references. If we are on a power7 or later, we
21328 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
21329 can load/store the value. */
21331 static void
21332 rs6000_alloc_sdmode_stack_slot (void)
21334 tree t;
21335 basic_block bb;
21336 gimple_stmt_iterator gsi;
21338 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
21339 /* We use a different approach for dealing with the secondary
21340 memory in LRA. */
21341 if (ira_use_lra_p)
21342 return;
21344 if (TARGET_NO_SDMODE_STACK)
21345 return;
21347 FOR_EACH_BB_FN (bb, cfun)
21348 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21350 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
21351 if (ret)
21353 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21354 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21355 SDmode, 0);
21356 return;
21360 /* Check for any SDmode parameters of the function. */
21361 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
21363 if (TREE_TYPE (t) == error_mark_node)
21364 continue;
21366 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
21367 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
21369 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21370 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21371 SDmode, 0);
21372 return;
21377 static void
21378 rs6000_instantiate_decls (void)
21380 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
21381 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
21384 /* Given an rtx X being reloaded into a reg required to be
21385 in class CLASS, return the class of reg to actually use.
21386 In general this is just CLASS; but on some machines
21387 in some cases it is preferable to use a more restrictive class.
21389 On the RS/6000, we have to return NO_REGS when we want to reload a
21390 floating-point CONST_DOUBLE to force it to be copied to memory.
21392 We also don't want to reload integer values into floating-point
21393 registers if we can at all help it. In fact, this can
21394 cause reload to die, if it tries to generate a reload of CTR
21395 into a FP register and discovers it doesn't have the memory location
21396 required.
21398 ??? Would it be a good idea to have reload do the converse, that is
21399 try to reload floating modes into FP registers if possible?
21402 static enum reg_class
21403 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
21405 machine_mode mode = GET_MODE (x);
21406 bool is_constant = CONSTANT_P (x);
21408 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
21409 reload class for it. */
21410 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21411 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
21412 return NO_REGS;
21414 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
21415 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
21416 return NO_REGS;
21418 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
21419 the reloading of address expressions using PLUS into floating point
21420 registers. */
21421 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
21423 if (is_constant)
21425 /* Zero is always allowed in all VSX registers. */
21426 if (x == CONST0_RTX (mode))
21427 return rclass;
21429 /* If this is a vector constant that can be formed with a few Altivec
21430 instructions, we want altivec registers. */
21431 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
21432 return ALTIVEC_REGS;
21434 /* If this is an integer constant that can easily be loaded into
21435 vector registers, allow it. */
21436 if (CONST_INT_P (x))
21438 HOST_WIDE_INT value = INTVAL (x);
21440 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
21441 2.06 can generate it in the Altivec registers with
21442 VSPLTI<x>. */
21443 if (value == -1)
21445 if (TARGET_P8_VECTOR)
21446 return rclass;
21447 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21448 return ALTIVEC_REGS;
21449 else
21450 return NO_REGS;
21453 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
21454 a sign extend in the Altivec registers. */
21455 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
21456 && TARGET_VSX_SMALL_INTEGER
21457 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
21458 return ALTIVEC_REGS;
21461 /* Force constant to memory. */
21462 return NO_REGS;
21465 /* D-form addressing can easily reload the value. */
21466 if (mode_supports_vmx_dform (mode)
21467 || mode_supports_vsx_dform_quad (mode))
21468 return rclass;
21470 /* If this is a scalar floating point value and we don't have D-form
21471 addressing, prefer the traditional floating point registers so that we
21472 can use D-form (register+offset) addressing. */
21473 if (rclass == VSX_REGS
21474 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
21475 return FLOAT_REGS;
21477 /* Prefer the Altivec registers if Altivec is handling the vector
21478 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
21479 loads. */
21480 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
21481 || mode == V1TImode)
21482 return ALTIVEC_REGS;
21484 return rclass;
21487 if (is_constant || GET_CODE (x) == PLUS)
21489 if (reg_class_subset_p (GENERAL_REGS, rclass))
21490 return GENERAL_REGS;
21491 if (reg_class_subset_p (BASE_REGS, rclass))
21492 return BASE_REGS;
21493 return NO_REGS;
21496 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21497 return GENERAL_REGS;
21499 return rclass;
21502 /* Debug version of rs6000_preferred_reload_class. */
21503 static enum reg_class
21504 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
21506 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
21508 fprintf (stderr,
21509 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
21510 "mode = %s, x:\n",
21511 reg_class_names[ret], reg_class_names[rclass],
21512 GET_MODE_NAME (GET_MODE (x)));
21513 debug_rtx (x);
21515 return ret;
21518 /* If we are copying between FP or AltiVec registers and anything else, we need
21519 a memory location. The exception is when we are targeting ppc64 and the
21520 move to/from fpr to gpr instructions are available. Also, under VSX, you
21521 can copy vector registers from the FP register set to the Altivec register
21522 set and vice versa. */
21524 static bool
21525 rs6000_secondary_memory_needed (enum reg_class from_class,
21526 enum reg_class to_class,
21527 machine_mode mode)
21529 enum rs6000_reg_type from_type, to_type;
21530 bool altivec_p = ((from_class == ALTIVEC_REGS)
21531 || (to_class == ALTIVEC_REGS));
21533 /* If a simple/direct move is available, we don't need secondary memory */
21534 from_type = reg_class_to_reg_type[(int)from_class];
21535 to_type = reg_class_to_reg_type[(int)to_class];
21537 if (rs6000_secondary_reload_move (to_type, from_type, mode,
21538 (secondary_reload_info *)0, altivec_p))
21539 return false;
21541 /* If we have a floating point or vector register class, we need to use
21542 memory to transfer the data. */
21543 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21544 return true;
21546 return false;
21549 /* Debug version of rs6000_secondary_memory_needed. */
21550 static bool
21551 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21552 enum reg_class to_class,
21553 machine_mode mode)
21555 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21557 fprintf (stderr,
21558 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21559 "to_class = %s, mode = %s\n",
21560 ret ? "true" : "false",
21561 reg_class_names[from_class],
21562 reg_class_names[to_class],
21563 GET_MODE_NAME (mode));
21565 return ret;
21568 /* Return the register class of a scratch register needed to copy IN into
21569 or out of a register in RCLASS in MODE. If it can be done directly,
21570 NO_REGS is returned. */
21572 static enum reg_class
21573 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
21574 rtx in)
21576 int regno;
21578 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
21579 #if TARGET_MACHO
21580 && MACHOPIC_INDIRECT
21581 #endif
21584 /* We cannot copy a symbolic operand directly into anything
21585 other than BASE_REGS for TARGET_ELF. So indicate that a
21586 register from BASE_REGS is needed as an intermediate
21587 register.
21589 On Darwin, pic addresses require a load from memory, which
21590 needs a base register. */
21591 if (rclass != BASE_REGS
21592 && (GET_CODE (in) == SYMBOL_REF
21593 || GET_CODE (in) == HIGH
21594 || GET_CODE (in) == LABEL_REF
21595 || GET_CODE (in) == CONST))
21596 return BASE_REGS;
21599 if (GET_CODE (in) == REG)
21601 regno = REGNO (in);
21602 if (regno >= FIRST_PSEUDO_REGISTER)
21604 regno = true_regnum (in);
21605 if (regno >= FIRST_PSEUDO_REGISTER)
21606 regno = -1;
21609 else if (GET_CODE (in) == SUBREG)
21611 regno = true_regnum (in);
21612 if (regno >= FIRST_PSEUDO_REGISTER)
21613 regno = -1;
21615 else
21616 regno = -1;
21618 /* If we have VSX register moves, prefer moving scalar values between
21619 Altivec registers and GPR by going via an FPR (and then via memory)
21620 instead of reloading the secondary memory address for Altivec moves. */
21621 if (TARGET_VSX
21622 && GET_MODE_SIZE (mode) < 16
21623 && !mode_supports_vmx_dform (mode)
21624 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
21625 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
21626 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21627 && (regno >= 0 && INT_REGNO_P (regno)))))
21628 return FLOAT_REGS;
21630 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
21631 into anything. */
21632 if (rclass == GENERAL_REGS || rclass == BASE_REGS
21633 || (regno >= 0 && INT_REGNO_P (regno)))
21634 return NO_REGS;
21636 /* Constants, memory, and VSX registers can go into VSX registers (both the
21637 traditional floating point and the altivec registers). */
21638 if (rclass == VSX_REGS
21639 && (regno == -1 || VSX_REGNO_P (regno)))
21640 return NO_REGS;
21642 /* Constants, memory, and FP registers can go into FP registers. */
21643 if ((regno == -1 || FP_REGNO_P (regno))
21644 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21645 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21647 /* Memory, and AltiVec registers can go into AltiVec registers. */
21648 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21649 && rclass == ALTIVEC_REGS)
21650 return NO_REGS;
21652 /* We can copy among the CR registers. */
21653 if ((rclass == CR_REGS || rclass == CR0_REGS)
21654 && regno >= 0 && CR_REGNO_P (regno))
21655 return NO_REGS;
21657 /* Otherwise, we need GENERAL_REGS. */
21658 return GENERAL_REGS;
21661 /* Debug version of rs6000_secondary_reload_class. */
21662 static enum reg_class
21663 rs6000_debug_secondary_reload_class (enum reg_class rclass,
21664 machine_mode mode, rtx in)
21666 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
21667 fprintf (stderr,
21668 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
21669 "mode = %s, input rtx:\n",
21670 reg_class_names[ret], reg_class_names[rclass],
21671 GET_MODE_NAME (mode));
21672 debug_rtx (in);
21674 return ret;
21677 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
21679 static bool
21680 rs6000_cannot_change_mode_class (machine_mode from,
21681 machine_mode to,
21682 enum reg_class rclass)
21684 unsigned from_size = GET_MODE_SIZE (from);
21685 unsigned to_size = GET_MODE_SIZE (to);
21687 if (from_size != to_size)
21689 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21691 if (reg_classes_intersect_p (xclass, rclass))
21693 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21694 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21695 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
21696 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
21698 /* Don't allow 64-bit types to overlap with 128-bit types that take a
21699 single register under VSX because the scalar part of the register
21700 is in the upper 64-bits, and not the lower 64-bits. Types like
21701 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21702 IEEE floating point can't overlap, and neither can small
21703 values. */
21705 if (to_float128_vector_p && from_float128_vector_p)
21706 return false;
21708 else if (to_float128_vector_p || from_float128_vector_p)
21709 return true;
21711 /* TDmode in floating-mode registers must always go into a register
21712 pair with the most significant word in the even-numbered register
21713 to match ISA requirements. In little-endian mode, this does not
21714 match subreg numbering, so we cannot allow subregs. */
21715 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21716 return true;
21718 if (from_size < 8 || to_size < 8)
21719 return true;
21721 if (from_size == 8 && (8 * to_nregs) != to_size)
21722 return true;
21724 if (to_size == 8 && (8 * from_nregs) != from_size)
21725 return true;
21727 return false;
21729 else
21730 return false;
21733 if (TARGET_E500_DOUBLE
21734 && ((((to) == DFmode) + ((from) == DFmode)) == 1
21735 || (((to) == TFmode) + ((from) == TFmode)) == 1
21736 || (((to) == IFmode) + ((from) == IFmode)) == 1
21737 || (((to) == KFmode) + ((from) == KFmode)) == 1
21738 || (((to) == DDmode) + ((from) == DDmode)) == 1
21739 || (((to) == TDmode) + ((from) == TDmode)) == 1
21740 || (((to) == DImode) + ((from) == DImode)) == 1))
21741 return true;
21743 /* Since the VSX register set includes traditional floating point registers
21744 and altivec registers, just check for the size being different instead of
21745 trying to check whether the modes are vector modes. Otherwise it won't
21746 allow say DF and DI to change classes. For types like TFmode and TDmode
21747 that take 2 64-bit registers, rather than a single 128-bit register, don't
21748 allow subregs of those types to other 128 bit types. */
21749 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21751 unsigned num_regs = (from_size + 15) / 16;
21752 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21753 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21754 return true;
21756 return (from_size != 8 && from_size != 16);
21759 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21760 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21761 return true;
21763 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
21764 && reg_classes_intersect_p (GENERAL_REGS, rclass))
21765 return true;
21767 return false;
21770 /* Debug version of rs6000_cannot_change_mode_class. */
21771 static bool
21772 rs6000_debug_cannot_change_mode_class (machine_mode from,
21773 machine_mode to,
21774 enum reg_class rclass)
21776 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
21778 fprintf (stderr,
21779 "rs6000_cannot_change_mode_class, return %s, from = %s, "
21780 "to = %s, rclass = %s\n",
21781 ret ? "true" : "false",
21782 GET_MODE_NAME (from), GET_MODE_NAME (to),
21783 reg_class_names[rclass]);
21785 return ret;
21788 /* Return a string to do a move operation of 128 bits of data. */
21790 const char *
21791 rs6000_output_move_128bit (rtx operands[])
21793 rtx dest = operands[0];
21794 rtx src = operands[1];
21795 machine_mode mode = GET_MODE (dest);
21796 int dest_regno;
21797 int src_regno;
21798 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21799 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21801 if (REG_P (dest))
21803 dest_regno = REGNO (dest);
21804 dest_gpr_p = INT_REGNO_P (dest_regno);
21805 dest_fp_p = FP_REGNO_P (dest_regno);
21806 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21807 dest_vsx_p = dest_fp_p | dest_vmx_p;
21809 else
21811 dest_regno = -1;
21812 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21815 if (REG_P (src))
21817 src_regno = REGNO (src);
21818 src_gpr_p = INT_REGNO_P (src_regno);
21819 src_fp_p = FP_REGNO_P (src_regno);
21820 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21821 src_vsx_p = src_fp_p | src_vmx_p;
21823 else
21825 src_regno = -1;
21826 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21829 /* Register moves. */
21830 if (dest_regno >= 0 && src_regno >= 0)
21832 if (dest_gpr_p)
21834 if (src_gpr_p)
21835 return "#";
21837 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21838 return (WORDS_BIG_ENDIAN
21839 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21840 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21842 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21843 return "#";
21846 else if (TARGET_VSX && dest_vsx_p)
21848 if (src_vsx_p)
21849 return "xxlor %x0,%x1,%x1";
21851 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21852 return (WORDS_BIG_ENDIAN
21853 ? "mtvsrdd %x0,%1,%L1"
21854 : "mtvsrdd %x0,%L1,%1");
21856 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21857 return "#";
21860 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21861 return "vor %0,%1,%1";
21863 else if (dest_fp_p && src_fp_p)
21864 return "#";
21867 /* Loads. */
21868 else if (dest_regno >= 0 && MEM_P (src))
21870 if (dest_gpr_p)
21872 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21873 return "lq %0,%1";
21874 else
21875 return "#";
21878 else if (TARGET_ALTIVEC && dest_vmx_p
21879 && altivec_indexed_or_indirect_operand (src, mode))
21880 return "lvx %0,%y1";
21882 else if (TARGET_VSX && dest_vsx_p)
21884 if (mode_supports_vsx_dform_quad (mode)
21885 && quad_address_p (XEXP (src, 0), mode, true))
21886 return "lxv %x0,%1";
21888 else if (TARGET_P9_VECTOR)
21889 return "lxvx %x0,%y1";
21891 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21892 return "lxvw4x %x0,%y1";
21894 else
21895 return "lxvd2x %x0,%y1";
21898 else if (TARGET_ALTIVEC && dest_vmx_p)
21899 return "lvx %0,%y1";
21901 else if (dest_fp_p)
21902 return "#";
21905 /* Stores. */
21906 else if (src_regno >= 0 && MEM_P (dest))
21908 if (src_gpr_p)
21910 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21911 return "stq %1,%0";
21912 else
21913 return "#";
21916 else if (TARGET_ALTIVEC && src_vmx_p
21917 && altivec_indexed_or_indirect_operand (src, mode))
21918 return "stvx %1,%y0";
21920 else if (TARGET_VSX && src_vsx_p)
21922 if (mode_supports_vsx_dform_quad (mode)
21923 && quad_address_p (XEXP (dest, 0), mode, true))
21924 return "stxv %x1,%0";
21926 else if (TARGET_P9_VECTOR)
21927 return "stxvx %x1,%y0";
21929 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21930 return "stxvw4x %x1,%y0";
21932 else
21933 return "stxvd2x %x1,%y0";
21936 else if (TARGET_ALTIVEC && src_vmx_p)
21937 return "stvx %1,%y0";
21939 else if (src_fp_p)
21940 return "#";
21943 /* Constants. */
21944 else if (dest_regno >= 0
21945 && (GET_CODE (src) == CONST_INT
21946 || GET_CODE (src) == CONST_WIDE_INT
21947 || GET_CODE (src) == CONST_DOUBLE
21948 || GET_CODE (src) == CONST_VECTOR))
21950 if (dest_gpr_p)
21951 return "#";
21953 else if ((dest_vmx_p && TARGET_ALTIVEC)
21954 || (dest_vsx_p && TARGET_VSX))
21955 return output_vec_const_move (operands);
21958 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21961 /* Validate a 128-bit move. */
21962 bool
21963 rs6000_move_128bit_ok_p (rtx operands[])
21965 machine_mode mode = GET_MODE (operands[0]);
21966 return (gpc_reg_operand (operands[0], mode)
21967 || gpc_reg_operand (operands[1], mode));
21970 /* Return true if a 128-bit move needs to be split. */
21971 bool
21972 rs6000_split_128bit_ok_p (rtx operands[])
21974 if (!reload_completed)
21975 return false;
21977 if (!gpr_or_gpr_p (operands[0], operands[1]))
21978 return false;
21980 if (quad_load_store_p (operands[0], operands[1]))
21981 return false;
21983 return true;
21987 /* Given a comparison operation, return the bit number in CCR to test. We
21988 know this is a valid comparison.
21990 SCC_P is 1 if this is for an scc. That means that %D will have been
21991 used instead of %C, so the bits will be in different places.
21993 Return -1 if OP isn't a valid comparison for some reason. */
21996 ccr_bit (rtx op, int scc_p)
21998 enum rtx_code code = GET_CODE (op);
21999 machine_mode cc_mode;
22000 int cc_regnum;
22001 int base_bit;
22002 rtx reg;
22004 if (!COMPARISON_P (op))
22005 return -1;
22007 reg = XEXP (op, 0);
22009 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
22011 cc_mode = GET_MODE (reg);
22012 cc_regnum = REGNO (reg);
22013 base_bit = 4 * (cc_regnum - CR0_REGNO);
22015 validate_condition_mode (code, cc_mode);
22017 /* When generating a sCOND operation, only positive conditions are
22018 allowed. */
22019 gcc_assert (!scc_p
22020 || code == EQ || code == GT || code == LT || code == UNORDERED
22021 || code == GTU || code == LTU);
22023 switch (code)
22025 case NE:
22026 return scc_p ? base_bit + 3 : base_bit + 2;
22027 case EQ:
22028 return base_bit + 2;
22029 case GT: case GTU: case UNLE:
22030 return base_bit + 1;
22031 case LT: case LTU: case UNGE:
22032 return base_bit;
22033 case ORDERED: case UNORDERED:
22034 return base_bit + 3;
22036 case GE: case GEU:
22037 /* If scc, we will have done a cror to put the bit in the
22038 unordered position. So test that bit. For integer, this is ! LT
22039 unless this is an scc insn. */
22040 return scc_p ? base_bit + 3 : base_bit;
22042 case LE: case LEU:
22043 return scc_p ? base_bit + 3 : base_bit + 1;
22045 default:
22046 gcc_unreachable ();
22050 /* Return the GOT register. */
22053 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
22055 /* The second flow pass currently (June 1999) can't update
22056 regs_ever_live without disturbing other parts of the compiler, so
22057 update it here to make the prolog/epilogue code happy. */
22058 if (!can_create_pseudo_p ()
22059 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
22060 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
22062 crtl->uses_pic_offset_table = 1;
22064 return pic_offset_table_rtx;
22067 static rs6000_stack_t stack_info;
22069 /* Function to init struct machine_function.
22070 This will be called, via a pointer variable,
22071 from push_function_context. */
22073 static struct machine_function *
22074 rs6000_init_machine_status (void)
22076 stack_info.reload_completed = 0;
22077 return ggc_cleared_alloc<machine_function> ();
22080 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
22082 /* Write out a function code label. */
22084 void
22085 rs6000_output_function_entry (FILE *file, const char *fname)
22087 if (fname[0] != '.')
22089 switch (DEFAULT_ABI)
22091 default:
22092 gcc_unreachable ();
22094 case ABI_AIX:
22095 if (DOT_SYMBOLS)
22096 putc ('.', file);
22097 else
22098 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
22099 break;
22101 case ABI_ELFv2:
22102 case ABI_V4:
22103 case ABI_DARWIN:
22104 break;
22108 RS6000_OUTPUT_BASENAME (file, fname);
22111 /* Print an operand. Recognize special options, documented below. */
22113 #if TARGET_ELF
22114 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
22115 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
22116 #else
22117 #define SMALL_DATA_RELOC "sda21"
22118 #define SMALL_DATA_REG 0
22119 #endif
22121 void
22122 print_operand (FILE *file, rtx x, int code)
22124 int i;
22125 unsigned HOST_WIDE_INT uval;
22127 switch (code)
22129 /* %a is output_address. */
22131 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
22132 output_operand. */
22134 case 'D':
22135 /* Like 'J' but get to the GT bit only. */
22136 gcc_assert (REG_P (x));
22138 /* Bit 1 is GT bit. */
22139 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
22141 /* Add one for shift count in rlinm for scc. */
22142 fprintf (file, "%d", i + 1);
22143 return;
22145 case 'e':
22146 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
22147 if (! INT_P (x))
22149 output_operand_lossage ("invalid %%e value");
22150 return;
22153 uval = INTVAL (x);
22154 if ((uval & 0xffff) == 0 && uval != 0)
22155 putc ('s', file);
22156 return;
22158 case 'E':
22159 /* X is a CR register. Print the number of the EQ bit of the CR */
22160 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22161 output_operand_lossage ("invalid %%E value");
22162 else
22163 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
22164 return;
22166 case 'f':
22167 /* X is a CR register. Print the shift count needed to move it
22168 to the high-order four bits. */
22169 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22170 output_operand_lossage ("invalid %%f value");
22171 else
22172 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
22173 return;
22175 case 'F':
22176 /* Similar, but print the count for the rotate in the opposite
22177 direction. */
22178 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22179 output_operand_lossage ("invalid %%F value");
22180 else
22181 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
22182 return;
22184 case 'G':
22185 /* X is a constant integer. If it is negative, print "m",
22186 otherwise print "z". This is to make an aze or ame insn. */
22187 if (GET_CODE (x) != CONST_INT)
22188 output_operand_lossage ("invalid %%G value");
22189 else if (INTVAL (x) >= 0)
22190 putc ('z', file);
22191 else
22192 putc ('m', file);
22193 return;
22195 case 'h':
22196 /* If constant, output low-order five bits. Otherwise, write
22197 normally. */
22198 if (INT_P (x))
22199 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
22200 else
22201 print_operand (file, x, 0);
22202 return;
22204 case 'H':
22205 /* If constant, output low-order six bits. Otherwise, write
22206 normally. */
22207 if (INT_P (x))
22208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
22209 else
22210 print_operand (file, x, 0);
22211 return;
22213 case 'I':
22214 /* Print `i' if this is a constant, else nothing. */
22215 if (INT_P (x))
22216 putc ('i', file);
22217 return;
22219 case 'j':
22220 /* Write the bit number in CCR for jump. */
22221 i = ccr_bit (x, 0);
22222 if (i == -1)
22223 output_operand_lossage ("invalid %%j code");
22224 else
22225 fprintf (file, "%d", i);
22226 return;
22228 case 'J':
22229 /* Similar, but add one for shift count in rlinm for scc and pass
22230 scc flag to `ccr_bit'. */
22231 i = ccr_bit (x, 1);
22232 if (i == -1)
22233 output_operand_lossage ("invalid %%J code");
22234 else
22235 /* If we want bit 31, write a shift count of zero, not 32. */
22236 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22237 return;
22239 case 'k':
22240 /* X must be a constant. Write the 1's complement of the
22241 constant. */
22242 if (! INT_P (x))
22243 output_operand_lossage ("invalid %%k value");
22244 else
22245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
22246 return;
22248 case 'K':
22249 /* X must be a symbolic constant on ELF. Write an
22250 expression suitable for an 'addi' that adds in the low 16
22251 bits of the MEM. */
22252 if (GET_CODE (x) == CONST)
22254 if (GET_CODE (XEXP (x, 0)) != PLUS
22255 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
22256 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
22257 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
22258 output_operand_lossage ("invalid %%K value");
22260 print_operand_address (file, x);
22261 fputs ("@l", file);
22262 return;
22264 /* %l is output_asm_label. */
22266 case 'L':
22267 /* Write second word of DImode or DFmode reference. Works on register
22268 or non-indexed memory only. */
22269 if (REG_P (x))
22270 fputs (reg_names[REGNO (x) + 1], file);
22271 else if (MEM_P (x))
22273 machine_mode mode = GET_MODE (x);
22274 /* Handle possible auto-increment. Since it is pre-increment and
22275 we have already done it, we can just use an offset of word. */
22276 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22277 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22278 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22279 UNITS_PER_WORD));
22280 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22281 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22282 UNITS_PER_WORD));
22283 else
22284 output_address (mode, XEXP (adjust_address_nv (x, SImode,
22285 UNITS_PER_WORD),
22286 0));
22288 if (small_data_operand (x, GET_MODE (x)))
22289 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22290 reg_names[SMALL_DATA_REG]);
22292 return;
22294 case 'N':
22295 /* Write the number of elements in the vector times 4. */
22296 if (GET_CODE (x) != PARALLEL)
22297 output_operand_lossage ("invalid %%N value");
22298 else
22299 fprintf (file, "%d", XVECLEN (x, 0) * 4);
22300 return;
22302 case 'O':
22303 /* Similar, but subtract 1 first. */
22304 if (GET_CODE (x) != PARALLEL)
22305 output_operand_lossage ("invalid %%O value");
22306 else
22307 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
22308 return;
22310 case 'p':
22311 /* X is a CONST_INT that is a power of two. Output the logarithm. */
22312 if (! INT_P (x)
22313 || INTVAL (x) < 0
22314 || (i = exact_log2 (INTVAL (x))) < 0)
22315 output_operand_lossage ("invalid %%p value");
22316 else
22317 fprintf (file, "%d", i);
22318 return;
22320 case 'P':
22321 /* The operand must be an indirect memory reference. The result
22322 is the register name. */
22323 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
22324 || REGNO (XEXP (x, 0)) >= 32)
22325 output_operand_lossage ("invalid %%P value");
22326 else
22327 fputs (reg_names[REGNO (XEXP (x, 0))], file);
22328 return;
22330 case 'q':
22331 /* This outputs the logical code corresponding to a boolean
22332 expression. The expression may have one or both operands
22333 negated (if one, only the first one). For condition register
22334 logical operations, it will also treat the negated
22335 CR codes as NOTs, but not handle NOTs of them. */
22337 const char *const *t = 0;
22338 const char *s;
22339 enum rtx_code code = GET_CODE (x);
22340 static const char * const tbl[3][3] = {
22341 { "and", "andc", "nor" },
22342 { "or", "orc", "nand" },
22343 { "xor", "eqv", "xor" } };
22345 if (code == AND)
22346 t = tbl[0];
22347 else if (code == IOR)
22348 t = tbl[1];
22349 else if (code == XOR)
22350 t = tbl[2];
22351 else
22352 output_operand_lossage ("invalid %%q value");
22354 if (GET_CODE (XEXP (x, 0)) != NOT)
22355 s = t[0];
22356 else
22358 if (GET_CODE (XEXP (x, 1)) == NOT)
22359 s = t[2];
22360 else
22361 s = t[1];
22364 fputs (s, file);
22366 return;
22368 case 'Q':
22369 if (! TARGET_MFCRF)
22370 return;
22371 fputc (',', file);
22372 /* FALLTHRU */
22374 case 'R':
22375 /* X is a CR register. Print the mask for `mtcrf'. */
22376 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22377 output_operand_lossage ("invalid %%R value");
22378 else
22379 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
22380 return;
22382 case 's':
22383 /* Low 5 bits of 32 - value */
22384 if (! INT_P (x))
22385 output_operand_lossage ("invalid %%s value");
22386 else
22387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
22388 return;
22390 case 't':
22391 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
22392 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
22394 /* Bit 3 is OV bit. */
22395 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
22397 /* If we want bit 31, write a shift count of zero, not 32. */
22398 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22399 return;
22401 case 'T':
22402 /* Print the symbolic name of a branch target register. */
22403 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
22404 && REGNO (x) != CTR_REGNO))
22405 output_operand_lossage ("invalid %%T value");
22406 else if (REGNO (x) == LR_REGNO)
22407 fputs ("lr", file);
22408 else
22409 fputs ("ctr", file);
22410 return;
22412 case 'u':
22413 /* High-order or low-order 16 bits of constant, whichever is non-zero,
22414 for use in unsigned operand. */
22415 if (! INT_P (x))
22417 output_operand_lossage ("invalid %%u value");
22418 return;
22421 uval = INTVAL (x);
22422 if ((uval & 0xffff) == 0)
22423 uval >>= 16;
22425 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
22426 return;
22428 case 'v':
22429 /* High-order 16 bits of constant for use in signed operand. */
22430 if (! INT_P (x))
22431 output_operand_lossage ("invalid %%v value");
22432 else
22433 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
22434 (INTVAL (x) >> 16) & 0xffff);
22435 return;
22437 case 'U':
22438 /* Print `u' if this has an auto-increment or auto-decrement. */
22439 if (MEM_P (x)
22440 && (GET_CODE (XEXP (x, 0)) == PRE_INC
22441 || GET_CODE (XEXP (x, 0)) == PRE_DEC
22442 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
22443 putc ('u', file);
22444 return;
22446 case 'V':
22447 /* Print the trap code for this operand. */
22448 switch (GET_CODE (x))
22450 case EQ:
22451 fputs ("eq", file); /* 4 */
22452 break;
22453 case NE:
22454 fputs ("ne", file); /* 24 */
22455 break;
22456 case LT:
22457 fputs ("lt", file); /* 16 */
22458 break;
22459 case LE:
22460 fputs ("le", file); /* 20 */
22461 break;
22462 case GT:
22463 fputs ("gt", file); /* 8 */
22464 break;
22465 case GE:
22466 fputs ("ge", file); /* 12 */
22467 break;
22468 case LTU:
22469 fputs ("llt", file); /* 2 */
22470 break;
22471 case LEU:
22472 fputs ("lle", file); /* 6 */
22473 break;
22474 case GTU:
22475 fputs ("lgt", file); /* 1 */
22476 break;
22477 case GEU:
22478 fputs ("lge", file); /* 5 */
22479 break;
22480 default:
22481 gcc_unreachable ();
22483 break;
22485 case 'w':
22486 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
22487 normally. */
22488 if (INT_P (x))
22489 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
22490 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
22491 else
22492 print_operand (file, x, 0);
22493 return;
22495 case 'x':
22496 /* X is a FPR or Altivec register used in a VSX context. */
22497 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
22498 output_operand_lossage ("invalid %%x value");
22499 else
22501 int reg = REGNO (x);
22502 int vsx_reg = (FP_REGNO_P (reg)
22503 ? reg - 32
22504 : reg - FIRST_ALTIVEC_REGNO + 32);
22506 #ifdef TARGET_REGNAMES
22507 if (TARGET_REGNAMES)
22508 fprintf (file, "%%vs%d", vsx_reg);
22509 else
22510 #endif
22511 fprintf (file, "%d", vsx_reg);
22513 return;
22515 case 'X':
22516 if (MEM_P (x)
22517 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
22518 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
22519 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
22520 putc ('x', file);
22521 return;
22523 case 'Y':
22524 /* Like 'L', for third word of TImode/PTImode */
22525 if (REG_P (x))
22526 fputs (reg_names[REGNO (x) + 2], file);
22527 else if (MEM_P (x))
22529 machine_mode mode = GET_MODE (x);
22530 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22531 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22532 output_address (mode, plus_constant (Pmode,
22533 XEXP (XEXP (x, 0), 0), 8));
22534 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22535 output_address (mode, plus_constant (Pmode,
22536 XEXP (XEXP (x, 0), 0), 8));
22537 else
22538 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
22539 if (small_data_operand (x, GET_MODE (x)))
22540 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22541 reg_names[SMALL_DATA_REG]);
22543 return;
22545 case 'z':
22546 /* X is a SYMBOL_REF. Write out the name preceded by a
22547 period and without any trailing data in brackets. Used for function
22548 names. If we are configured for System V (or the embedded ABI) on
22549 the PowerPC, do not emit the period, since those systems do not use
22550 TOCs and the like. */
22551 gcc_assert (GET_CODE (x) == SYMBOL_REF);
22553 /* For macho, check to see if we need a stub. */
22554 if (TARGET_MACHO)
22556 const char *name = XSTR (x, 0);
22557 #if TARGET_MACHO
22558 if (darwin_emit_branch_islands
22559 && MACHOPIC_INDIRECT
22560 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
22561 name = machopic_indirection_name (x, /*stub_p=*/true);
22562 #endif
22563 assemble_name (file, name);
22565 else if (!DOT_SYMBOLS)
22566 assemble_name (file, XSTR (x, 0));
22567 else
22568 rs6000_output_function_entry (file, XSTR (x, 0));
22569 return;
22571 case 'Z':
22572 /* Like 'L', for last word of TImode/PTImode. */
22573 if (REG_P (x))
22574 fputs (reg_names[REGNO (x) + 3], file);
22575 else if (MEM_P (x))
22577 machine_mode mode = GET_MODE (x);
22578 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22579 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22580 output_address (mode, plus_constant (Pmode,
22581 XEXP (XEXP (x, 0), 0), 12));
22582 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22583 output_address (mode, plus_constant (Pmode,
22584 XEXP (XEXP (x, 0), 0), 12));
22585 else
22586 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
22587 if (small_data_operand (x, GET_MODE (x)))
22588 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22589 reg_names[SMALL_DATA_REG]);
22591 return;
22593 /* Print AltiVec or SPE memory operand. */
22594 case 'y':
22596 rtx tmp;
22598 gcc_assert (MEM_P (x));
22600 tmp = XEXP (x, 0);
22602 /* Ugly hack because %y is overloaded. */
22603 if ((TARGET_SPE || TARGET_E500_DOUBLE)
22604 && (GET_MODE_SIZE (GET_MODE (x)) == 8
22605 || FLOAT128_2REG_P (GET_MODE (x))
22606 || GET_MODE (x) == TImode
22607 || GET_MODE (x) == PTImode))
22609 /* Handle [reg]. */
22610 if (REG_P (tmp))
22612 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
22613 break;
22615 /* Handle [reg+UIMM]. */
22616 else if (GET_CODE (tmp) == PLUS &&
22617 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
22619 int x;
22621 gcc_assert (REG_P (XEXP (tmp, 0)));
22623 x = INTVAL (XEXP (tmp, 1));
22624 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
22625 break;
22628 /* Fall through. Must be [reg+reg]. */
22630 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
22631 && GET_CODE (tmp) == AND
22632 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
22633 && INTVAL (XEXP (tmp, 1)) == -16)
22634 tmp = XEXP (tmp, 0);
22635 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
22636 && GET_CODE (tmp) == PRE_MODIFY)
22637 tmp = XEXP (tmp, 1);
22638 if (REG_P (tmp))
22639 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
22640 else
22642 if (GET_CODE (tmp) != PLUS
22643 || !REG_P (XEXP (tmp, 0))
22644 || !REG_P (XEXP (tmp, 1)))
22646 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
22647 break;
22650 if (REGNO (XEXP (tmp, 0)) == 0)
22651 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
22652 reg_names[ REGNO (XEXP (tmp, 0)) ]);
22653 else
22654 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
22655 reg_names[ REGNO (XEXP (tmp, 1)) ]);
22657 break;
22660 case 0:
22661 if (REG_P (x))
22662 fprintf (file, "%s", reg_names[REGNO (x)]);
22663 else if (MEM_P (x))
22665 /* We need to handle PRE_INC and PRE_DEC here, since we need to
22666 know the width from the mode. */
22667 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
22668 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
22669 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22670 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
22671 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
22672 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22673 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22674 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
22675 else
22676 output_address (GET_MODE (x), XEXP (x, 0));
22678 else
22680 if (toc_relative_expr_p (x, false))
22681 /* This hack along with a corresponding hack in
22682 rs6000_output_addr_const_extra arranges to output addends
22683 where the assembler expects to find them. eg.
22684 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
22685 without this hack would be output as "x@toc+4". We
22686 want "x+4@toc". */
22687 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22688 else
22689 output_addr_const (file, x);
22691 return;
22693 case '&':
22694 if (const char *name = get_some_local_dynamic_name ())
22695 assemble_name (file, name);
22696 else
22697 output_operand_lossage ("'%%&' used without any "
22698 "local dynamic TLS references");
22699 return;
22701 default:
22702 output_operand_lossage ("invalid %%xn code");
22706 /* Print the address of an operand. */
22708 void
22709 print_operand_address (FILE *file, rtx x)
22711 if (REG_P (x))
22712 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
22713 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
22714 || GET_CODE (x) == LABEL_REF)
22716 output_addr_const (file, x);
22717 if (small_data_operand (x, GET_MODE (x)))
22718 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22719 reg_names[SMALL_DATA_REG]);
22720 else
22721 gcc_assert (!TARGET_TOC);
22723 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22724 && REG_P (XEXP (x, 1)))
22726 if (REGNO (XEXP (x, 0)) == 0)
22727 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
22728 reg_names[ REGNO (XEXP (x, 0)) ]);
22729 else
22730 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
22731 reg_names[ REGNO (XEXP (x, 1)) ]);
22733 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22734 && GET_CODE (XEXP (x, 1)) == CONST_INT)
22735 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
22736 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
22737 #if TARGET_MACHO
22738 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22739 && CONSTANT_P (XEXP (x, 1)))
22741 fprintf (file, "lo16(");
22742 output_addr_const (file, XEXP (x, 1));
22743 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22745 #endif
22746 #if TARGET_ELF
22747 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22748 && CONSTANT_P (XEXP (x, 1)))
22750 output_addr_const (file, XEXP (x, 1));
22751 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22753 #endif
22754 else if (toc_relative_expr_p (x, false))
22756 /* This hack along with a corresponding hack in
22757 rs6000_output_addr_const_extra arranges to output addends
22758 where the assembler expects to find them. eg.
22759 (lo_sum (reg 9)
22760 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
22761 without this hack would be output as "x@toc+8@l(9)". We
22762 want "x+8@toc@l(9)". */
22763 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22764 if (GET_CODE (x) == LO_SUM)
22765 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
22766 else
22767 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
22769 else
22770 gcc_unreachable ();
22773 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
22775 static bool
22776 rs6000_output_addr_const_extra (FILE *file, rtx x)
22778 if (GET_CODE (x) == UNSPEC)
22779 switch (XINT (x, 1))
22781 case UNSPEC_TOCREL:
22782 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
22783 && REG_P (XVECEXP (x, 0, 1))
22784 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
22785 output_addr_const (file, XVECEXP (x, 0, 0));
22786 if (x == tocrel_base && tocrel_offset != const0_rtx)
22788 if (INTVAL (tocrel_offset) >= 0)
22789 fprintf (file, "+");
22790 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
22792 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
22794 putc ('-', file);
22795 assemble_name (file, toc_label_name);
22796 need_toc_init = 1;
22798 else if (TARGET_ELF)
22799 fputs ("@toc", file);
22800 return true;
22802 #if TARGET_MACHO
22803 case UNSPEC_MACHOPIC_OFFSET:
22804 output_addr_const (file, XVECEXP (x, 0, 0));
22805 putc ('-', file);
22806 machopic_output_function_base_name (file);
22807 return true;
22808 #endif
22810 return false;
22813 /* Target hook for assembling integer objects. The PowerPC version has
22814 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
22815 is defined. It also needs to handle DI-mode objects on 64-bit
22816 targets. */
22818 static bool
22819 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
22821 #ifdef RELOCATABLE_NEEDS_FIXUP
22822 /* Special handling for SI values. */
22823 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
22825 static int recurse = 0;
22827 /* For -mrelocatable, we mark all addresses that need to be fixed up in
22828 the .fixup section. Since the TOC section is already relocated, we
22829 don't need to mark it here. We used to skip the text section, but it
22830 should never be valid for relocated addresses to be placed in the text
22831 section. */
22832 if (DEFAULT_ABI == ABI_V4
22833 && (TARGET_RELOCATABLE || flag_pic > 1)
22834 && in_section != toc_section
22835 && !recurse
22836 && !CONST_SCALAR_INT_P (x)
22837 && CONSTANT_P (x))
22839 char buf[256];
22841 recurse = 1;
22842 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
22843 fixuplabelno++;
22844 ASM_OUTPUT_LABEL (asm_out_file, buf);
22845 fprintf (asm_out_file, "\t.long\t(");
22846 output_addr_const (asm_out_file, x);
22847 fprintf (asm_out_file, ")@fixup\n");
22848 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
22849 ASM_OUTPUT_ALIGN (asm_out_file, 2);
22850 fprintf (asm_out_file, "\t.long\t");
22851 assemble_name (asm_out_file, buf);
22852 fprintf (asm_out_file, "\n\t.previous\n");
22853 recurse = 0;
22854 return true;
22856 /* Remove initial .'s to turn a -mcall-aixdesc function
22857 address into the address of the descriptor, not the function
22858 itself. */
22859 else if (GET_CODE (x) == SYMBOL_REF
22860 && XSTR (x, 0)[0] == '.'
22861 && DEFAULT_ABI == ABI_AIX)
22863 const char *name = XSTR (x, 0);
22864 while (*name == '.')
22865 name++;
22867 fprintf (asm_out_file, "\t.long\t%s\n", name);
22868 return true;
22871 #endif /* RELOCATABLE_NEEDS_FIXUP */
22872 return default_assemble_integer (x, size, aligned_p);
22875 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22876 /* Emit an assembler directive to set symbol visibility for DECL to
22877 VISIBILITY_TYPE. */
22879 static void
22880 rs6000_assemble_visibility (tree decl, int vis)
22882 if (TARGET_XCOFF)
22883 return;
22885 /* Functions need to have their entry point symbol visibility set as
22886 well as their descriptor symbol visibility. */
22887 if (DEFAULT_ABI == ABI_AIX
22888 && DOT_SYMBOLS
22889 && TREE_CODE (decl) == FUNCTION_DECL)
22891 static const char * const visibility_types[] = {
22892 NULL, "protected", "hidden", "internal"
22895 const char *name, *type;
22897 name = ((* targetm.strip_name_encoding)
22898 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22899 type = visibility_types[vis];
22901 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22902 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22904 else
22905 default_assemble_visibility (decl, vis);
22907 #endif
22909 enum rtx_code
22910 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22912 /* Reversal of FP compares takes care -- an ordered compare
22913 becomes an unordered compare and vice versa. */
22914 if (mode == CCFPmode
22915 && (!flag_finite_math_only
22916 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22917 || code == UNEQ || code == LTGT))
22918 return reverse_condition_maybe_unordered (code);
22919 else
22920 return reverse_condition (code);
22923 /* Generate a compare for CODE. Return a brand-new rtx that
22924 represents the result of the compare. */
22926 static rtx
22927 rs6000_generate_compare (rtx cmp, machine_mode mode)
22929 machine_mode comp_mode;
22930 rtx compare_result;
22931 enum rtx_code code = GET_CODE (cmp);
22932 rtx op0 = XEXP (cmp, 0);
22933 rtx op1 = XEXP (cmp, 1);
22935 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22936 comp_mode = CCmode;
22937 else if (FLOAT_MODE_P (mode))
22938 comp_mode = CCFPmode;
22939 else if (code == GTU || code == LTU
22940 || code == GEU || code == LEU)
22941 comp_mode = CCUNSmode;
22942 else if ((code == EQ || code == NE)
22943 && unsigned_reg_p (op0)
22944 && (unsigned_reg_p (op1)
22945 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22946 /* These are unsigned values, perhaps there will be a later
22947 ordering compare that can be shared with this one. */
22948 comp_mode = CCUNSmode;
22949 else
22950 comp_mode = CCmode;
22952 /* If we have an unsigned compare, make sure we don't have a signed value as
22953 an immediate. */
22954 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22955 && INTVAL (op1) < 0)
22957 op0 = copy_rtx_if_shared (op0);
22958 op1 = force_reg (GET_MODE (op0), op1);
22959 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22962 /* First, the compare. */
22963 compare_result = gen_reg_rtx (comp_mode);
22965 /* E500 FP compare instructions on the GPRs. Yuck! */
22966 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
22967 && FLOAT_MODE_P (mode))
22969 rtx cmp, or_result, compare_result2;
22970 machine_mode op_mode = GET_MODE (op0);
22971 bool reverse_p;
22973 if (op_mode == VOIDmode)
22974 op_mode = GET_MODE (op1);
22976 /* First reverse the condition codes that aren't directly supported. */
22977 switch (code)
22979 case NE:
22980 case UNLT:
22981 case UNLE:
22982 case UNGT:
22983 case UNGE:
22984 code = reverse_condition_maybe_unordered (code);
22985 reverse_p = true;
22986 break;
22988 case EQ:
22989 case LT:
22990 case LE:
22991 case GT:
22992 case GE:
22993 reverse_p = false;
22994 break;
22996 default:
22997 gcc_unreachable ();
23000 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
23001 This explains the following mess. */
23003 switch (code)
23005 case EQ:
23006 switch (op_mode)
23008 case SFmode:
23009 cmp = (flag_finite_math_only && !flag_trapping_math)
23010 ? gen_tstsfeq_gpr (compare_result, op0, op1)
23011 : gen_cmpsfeq_gpr (compare_result, op0, op1);
23012 break;
23014 case DFmode:
23015 cmp = (flag_finite_math_only && !flag_trapping_math)
23016 ? gen_tstdfeq_gpr (compare_result, op0, op1)
23017 : gen_cmpdfeq_gpr (compare_result, op0, op1);
23018 break;
23020 case TFmode:
23021 case IFmode:
23022 case KFmode:
23023 cmp = (flag_finite_math_only && !flag_trapping_math)
23024 ? gen_tsttfeq_gpr (compare_result, op0, op1)
23025 : gen_cmptfeq_gpr (compare_result, op0, op1);
23026 break;
23028 default:
23029 gcc_unreachable ();
23031 break;
23033 case GT:
23034 case GE:
23035 switch (op_mode)
23037 case SFmode:
23038 cmp = (flag_finite_math_only && !flag_trapping_math)
23039 ? gen_tstsfgt_gpr (compare_result, op0, op1)
23040 : gen_cmpsfgt_gpr (compare_result, op0, op1);
23041 break;
23043 case DFmode:
23044 cmp = (flag_finite_math_only && !flag_trapping_math)
23045 ? gen_tstdfgt_gpr (compare_result, op0, op1)
23046 : gen_cmpdfgt_gpr (compare_result, op0, op1);
23047 break;
23049 case TFmode:
23050 case IFmode:
23051 case KFmode:
23052 cmp = (flag_finite_math_only && !flag_trapping_math)
23053 ? gen_tsttfgt_gpr (compare_result, op0, op1)
23054 : gen_cmptfgt_gpr (compare_result, op0, op1);
23055 break;
23057 default:
23058 gcc_unreachable ();
23060 break;
23062 case LT:
23063 case LE:
23064 switch (op_mode)
23066 case SFmode:
23067 cmp = (flag_finite_math_only && !flag_trapping_math)
23068 ? gen_tstsflt_gpr (compare_result, op0, op1)
23069 : gen_cmpsflt_gpr (compare_result, op0, op1);
23070 break;
23072 case DFmode:
23073 cmp = (flag_finite_math_only && !flag_trapping_math)
23074 ? gen_tstdflt_gpr (compare_result, op0, op1)
23075 : gen_cmpdflt_gpr (compare_result, op0, op1);
23076 break;
23078 case TFmode:
23079 case IFmode:
23080 case KFmode:
23081 cmp = (flag_finite_math_only && !flag_trapping_math)
23082 ? gen_tsttflt_gpr (compare_result, op0, op1)
23083 : gen_cmptflt_gpr (compare_result, op0, op1);
23084 break;
23086 default:
23087 gcc_unreachable ();
23089 break;
23091 default:
23092 gcc_unreachable ();
23095 /* Synthesize LE and GE from LT/GT || EQ. */
23096 if (code == LE || code == GE)
23098 emit_insn (cmp);
23100 compare_result2 = gen_reg_rtx (CCFPmode);
23102 /* Do the EQ. */
23103 switch (op_mode)
23105 case SFmode:
23106 cmp = (flag_finite_math_only && !flag_trapping_math)
23107 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
23108 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
23109 break;
23111 case DFmode:
23112 cmp = (flag_finite_math_only && !flag_trapping_math)
23113 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
23114 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
23115 break;
23117 case TFmode:
23118 case IFmode:
23119 case KFmode:
23120 cmp = (flag_finite_math_only && !flag_trapping_math)
23121 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
23122 : gen_cmptfeq_gpr (compare_result2, op0, op1);
23123 break;
23125 default:
23126 gcc_unreachable ();
23129 emit_insn (cmp);
23131 /* OR them together. */
23132 or_result = gen_reg_rtx (CCFPmode);
23133 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
23134 compare_result2);
23135 compare_result = or_result;
23138 code = reverse_p ? NE : EQ;
23140 emit_insn (cmp);
23143 /* IEEE 128-bit support in VSX registers when we do not have hardware
23144 support. */
23145 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
23147 rtx libfunc = NULL_RTX;
23148 bool check_nan = false;
23149 rtx dest;
23151 switch (code)
23153 case EQ:
23154 case NE:
23155 libfunc = optab_libfunc (eq_optab, mode);
23156 break;
23158 case GT:
23159 case GE:
23160 libfunc = optab_libfunc (ge_optab, mode);
23161 break;
23163 case LT:
23164 case LE:
23165 libfunc = optab_libfunc (le_optab, mode);
23166 break;
23168 case UNORDERED:
23169 case ORDERED:
23170 libfunc = optab_libfunc (unord_optab, mode);
23171 code = (code == UNORDERED) ? NE : EQ;
23172 break;
23174 case UNGE:
23175 case UNGT:
23176 check_nan = true;
23177 libfunc = optab_libfunc (ge_optab, mode);
23178 code = (code == UNGE) ? GE : GT;
23179 break;
23181 case UNLE:
23182 case UNLT:
23183 check_nan = true;
23184 libfunc = optab_libfunc (le_optab, mode);
23185 code = (code == UNLE) ? LE : LT;
23186 break;
23188 case UNEQ:
23189 case LTGT:
23190 check_nan = true;
23191 libfunc = optab_libfunc (eq_optab, mode);
23192 code = (code = UNEQ) ? EQ : NE;
23193 break;
23195 default:
23196 gcc_unreachable ();
23199 gcc_assert (libfunc);
23201 if (!check_nan)
23202 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23203 SImode, 2, op0, mode, op1, mode);
23205 /* The library signals an exception for signalling NaNs, so we need to
23206 handle isgreater, etc. by first checking isordered. */
23207 else
23209 rtx ne_rtx, normal_dest, unord_dest;
23210 rtx unord_func = optab_libfunc (unord_optab, mode);
23211 rtx join_label = gen_label_rtx ();
23212 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
23213 rtx unord_cmp = gen_reg_rtx (comp_mode);
23216 /* Test for either value being a NaN. */
23217 gcc_assert (unord_func);
23218 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
23219 SImode, 2, op0, mode, op1,
23220 mode);
23222 /* Set value (0) if either value is a NaN, and jump to the join
23223 label. */
23224 dest = gen_reg_rtx (SImode);
23225 emit_move_insn (dest, const1_rtx);
23226 emit_insn (gen_rtx_SET (unord_cmp,
23227 gen_rtx_COMPARE (comp_mode, unord_dest,
23228 const0_rtx)));
23230 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
23231 emit_jump_insn (gen_rtx_SET (pc_rtx,
23232 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
23233 join_ref,
23234 pc_rtx)));
23236 /* Do the normal comparison, knowing that the values are not
23237 NaNs. */
23238 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23239 SImode, 2, op0, mode, op1,
23240 mode);
23242 emit_insn (gen_cstoresi4 (dest,
23243 gen_rtx_fmt_ee (code, SImode, normal_dest,
23244 const0_rtx),
23245 normal_dest, const0_rtx));
23247 /* Join NaN and non-Nan paths. Compare dest against 0. */
23248 emit_label (join_label);
23249 code = NE;
23252 emit_insn (gen_rtx_SET (compare_result,
23253 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
23256 else
23258 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
23259 CLOBBERs to match cmptf_internal2 pattern. */
23260 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
23261 && FLOAT128_IBM_P (GET_MODE (op0))
23262 && TARGET_HARD_FLOAT && TARGET_FPRS)
23263 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23264 gen_rtvec (10,
23265 gen_rtx_SET (compare_result,
23266 gen_rtx_COMPARE (comp_mode, op0, op1)),
23267 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23268 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23269 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23270 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23271 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23272 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23273 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23274 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23275 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
23276 else if (GET_CODE (op1) == UNSPEC
23277 && XINT (op1, 1) == UNSPEC_SP_TEST)
23279 rtx op1b = XVECEXP (op1, 0, 0);
23280 comp_mode = CCEQmode;
23281 compare_result = gen_reg_rtx (CCEQmode);
23282 if (TARGET_64BIT)
23283 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
23284 else
23285 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
23287 else
23288 emit_insn (gen_rtx_SET (compare_result,
23289 gen_rtx_COMPARE (comp_mode, op0, op1)));
23292 /* Some kinds of FP comparisons need an OR operation;
23293 under flag_finite_math_only we don't bother. */
23294 if (FLOAT_MODE_P (mode)
23295 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
23296 && !flag_finite_math_only
23297 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
23298 && (code == LE || code == GE
23299 || code == UNEQ || code == LTGT
23300 || code == UNGT || code == UNLT))
23302 enum rtx_code or1, or2;
23303 rtx or1_rtx, or2_rtx, compare2_rtx;
23304 rtx or_result = gen_reg_rtx (CCEQmode);
23306 switch (code)
23308 case LE: or1 = LT; or2 = EQ; break;
23309 case GE: or1 = GT; or2 = EQ; break;
23310 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
23311 case LTGT: or1 = LT; or2 = GT; break;
23312 case UNGT: or1 = UNORDERED; or2 = GT; break;
23313 case UNLT: or1 = UNORDERED; or2 = LT; break;
23314 default: gcc_unreachable ();
23316 validate_condition_mode (or1, comp_mode);
23317 validate_condition_mode (or2, comp_mode);
23318 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
23319 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
23320 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
23321 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
23322 const_true_rtx);
23323 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
23325 compare_result = or_result;
23326 code = EQ;
23329 validate_condition_mode (code, GET_MODE (compare_result));
23331 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
23335 /* Return the diagnostic message string if the binary operation OP is
23336 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23338 static const char*
23339 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
23340 const_tree type1,
23341 const_tree type2)
23343 enum machine_mode mode1 = TYPE_MODE (type1);
23344 enum machine_mode mode2 = TYPE_MODE (type2);
23346 /* For complex modes, use the inner type. */
23347 if (COMPLEX_MODE_P (mode1))
23348 mode1 = GET_MODE_INNER (mode1);
23350 if (COMPLEX_MODE_P (mode2))
23351 mode2 = GET_MODE_INNER (mode2);
23353 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
23354 double to intermix unless -mfloat128-convert. */
23355 if (mode1 == mode2)
23356 return NULL;
23358 if (!TARGET_FLOAT128_CVT)
23360 if ((mode1 == KFmode && mode2 == IFmode)
23361 || (mode1 == IFmode && mode2 == KFmode))
23362 return N_("__float128 and __ibm128 cannot be used in the same "
23363 "expression");
23365 if (TARGET_IEEEQUAD
23366 && ((mode1 == IFmode && mode2 == TFmode)
23367 || (mode1 == TFmode && mode2 == IFmode)))
23368 return N_("__ibm128 and long double cannot be used in the same "
23369 "expression");
23371 if (!TARGET_IEEEQUAD
23372 && ((mode1 == KFmode && mode2 == TFmode)
23373 || (mode1 == TFmode && mode2 == KFmode)))
23374 return N_("__float128 and long double cannot be used in the same "
23375 "expression");
23378 return NULL;
23382 /* Expand floating point conversion to/from __float128 and __ibm128. */
23384 void
23385 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
23387 machine_mode dest_mode = GET_MODE (dest);
23388 machine_mode src_mode = GET_MODE (src);
23389 convert_optab cvt = unknown_optab;
23390 bool do_move = false;
23391 rtx libfunc = NULL_RTX;
23392 rtx dest2;
23393 typedef rtx (*rtx_2func_t) (rtx, rtx);
23394 rtx_2func_t hw_convert = (rtx_2func_t)0;
23395 size_t kf_or_tf;
23397 struct hw_conv_t {
23398 rtx_2func_t from_df;
23399 rtx_2func_t from_sf;
23400 rtx_2func_t from_si_sign;
23401 rtx_2func_t from_si_uns;
23402 rtx_2func_t from_di_sign;
23403 rtx_2func_t from_di_uns;
23404 rtx_2func_t to_df;
23405 rtx_2func_t to_sf;
23406 rtx_2func_t to_si_sign;
23407 rtx_2func_t to_si_uns;
23408 rtx_2func_t to_di_sign;
23409 rtx_2func_t to_di_uns;
23410 } hw_conversions[2] = {
23411 /* convertions to/from KFmode */
23413 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
23414 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
23415 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
23416 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
23417 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
23418 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
23419 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
23420 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
23421 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
23422 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
23423 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
23424 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
23427 /* convertions to/from TFmode */
23429 gen_extenddftf2_hw, /* TFmode <- DFmode. */
23430 gen_extendsftf2_hw, /* TFmode <- SFmode. */
23431 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
23432 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
23433 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
23434 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
23435 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
23436 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
23437 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
23438 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
23439 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
23440 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
23444 if (dest_mode == src_mode)
23445 gcc_unreachable ();
23447 /* Eliminate memory operations. */
23448 if (MEM_P (src))
23449 src = force_reg (src_mode, src);
23451 if (MEM_P (dest))
23453 rtx tmp = gen_reg_rtx (dest_mode);
23454 rs6000_expand_float128_convert (tmp, src, unsigned_p);
23455 rs6000_emit_move (dest, tmp, dest_mode);
23456 return;
23459 /* Convert to IEEE 128-bit floating point. */
23460 if (FLOAT128_IEEE_P (dest_mode))
23462 if (dest_mode == KFmode)
23463 kf_or_tf = 0;
23464 else if (dest_mode == TFmode)
23465 kf_or_tf = 1;
23466 else
23467 gcc_unreachable ();
23469 switch (src_mode)
23471 case DFmode:
23472 cvt = sext_optab;
23473 hw_convert = hw_conversions[kf_or_tf].from_df;
23474 break;
23476 case SFmode:
23477 cvt = sext_optab;
23478 hw_convert = hw_conversions[kf_or_tf].from_sf;
23479 break;
23481 case KFmode:
23482 case IFmode:
23483 case TFmode:
23484 if (FLOAT128_IBM_P (src_mode))
23485 cvt = sext_optab;
23486 else
23487 do_move = true;
23488 break;
23490 case SImode:
23491 if (unsigned_p)
23493 cvt = ufloat_optab;
23494 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
23496 else
23498 cvt = sfloat_optab;
23499 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
23501 break;
23503 case DImode:
23504 if (unsigned_p)
23506 cvt = ufloat_optab;
23507 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
23509 else
23511 cvt = sfloat_optab;
23512 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
23514 break;
23516 default:
23517 gcc_unreachable ();
23521 /* Convert from IEEE 128-bit floating point. */
23522 else if (FLOAT128_IEEE_P (src_mode))
23524 if (src_mode == KFmode)
23525 kf_or_tf = 0;
23526 else if (src_mode == TFmode)
23527 kf_or_tf = 1;
23528 else
23529 gcc_unreachable ();
23531 switch (dest_mode)
23533 case DFmode:
23534 cvt = trunc_optab;
23535 hw_convert = hw_conversions[kf_or_tf].to_df;
23536 break;
23538 case SFmode:
23539 cvt = trunc_optab;
23540 hw_convert = hw_conversions[kf_or_tf].to_sf;
23541 break;
23543 case KFmode:
23544 case IFmode:
23545 case TFmode:
23546 if (FLOAT128_IBM_P (dest_mode))
23547 cvt = trunc_optab;
23548 else
23549 do_move = true;
23550 break;
23552 case SImode:
23553 if (unsigned_p)
23555 cvt = ufix_optab;
23556 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
23558 else
23560 cvt = sfix_optab;
23561 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
23563 break;
23565 case DImode:
23566 if (unsigned_p)
23568 cvt = ufix_optab;
23569 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
23571 else
23573 cvt = sfix_optab;
23574 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
23576 break;
23578 default:
23579 gcc_unreachable ();
23583 /* Both IBM format. */
23584 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
23585 do_move = true;
23587 else
23588 gcc_unreachable ();
23590 /* Handle conversion between TFmode/KFmode. */
23591 if (do_move)
23592 emit_move_insn (dest, gen_lowpart (dest_mode, src));
23594 /* Handle conversion if we have hardware support. */
23595 else if (TARGET_FLOAT128_HW && hw_convert)
23596 emit_insn ((hw_convert) (dest, src));
23598 /* Call an external function to do the conversion. */
23599 else if (cvt != unknown_optab)
23601 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
23602 gcc_assert (libfunc != NULL_RTX);
23604 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
23605 src_mode);
23607 gcc_assert (dest2 != NULL_RTX);
23608 if (!rtx_equal_p (dest, dest2))
23609 emit_move_insn (dest, dest2);
23612 else
23613 gcc_unreachable ();
23615 return;
23618 /* Split a conversion from __float128 to an integer type into separate insns.
23619 OPERANDS points to the destination, source, and V2DI temporary
23620 register. CODE is either FIX or UNSIGNED_FIX. */
23622 void
23623 convert_float128_to_int (rtx *operands, enum rtx_code code)
23625 rtx dest = operands[0];
23626 rtx src = operands[1];
23627 rtx tmp = operands[2];
23628 rtx cvt;
23629 rtvec cvt_vec;
23630 rtx cvt_unspec;
23631 rtvec move_vec;
23632 rtx move_unspec;
23634 if (GET_CODE (tmp) == SCRATCH)
23635 tmp = gen_reg_rtx (V2DImode);
23637 if (MEM_P (dest))
23638 dest = rs6000_address_for_fpconvert (dest);
23640 /* Generate the actual convert insn of the form:
23641 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
23642 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
23643 cvt_vec = gen_rtvec (1, cvt);
23644 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23645 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
23647 /* Generate the move insn of the form:
23648 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
23649 move_vec = gen_rtvec (1, tmp);
23650 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
23651 emit_insn (gen_rtx_SET (dest, move_unspec));
23654 /* Split a conversion from an integer type to __float128 into separate insns.
23655 OPERANDS points to the destination, source, and V2DI temporary
23656 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
23658 void
23659 convert_int_to_float128 (rtx *operands, enum rtx_code code)
23661 rtx dest = operands[0];
23662 rtx src = operands[1];
23663 rtx tmp = operands[2];
23664 rtx cvt;
23665 rtvec cvt_vec;
23666 rtx cvt_unspec;
23667 rtvec move_vec;
23668 rtx move_unspec;
23669 rtx unsigned_flag;
23671 if (GET_CODE (tmp) == SCRATCH)
23672 tmp = gen_reg_rtx (V2DImode);
23674 if (MEM_P (src))
23675 src = rs6000_address_for_fpconvert (src);
23677 /* Generate the move of the integer into the Altivec register of the form:
23678 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
23679 (const_int 0)] UNSPEC_IEEE128_MOVE)).
23682 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
23684 if (GET_MODE (src) == SImode)
23686 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
23687 move_vec = gen_rtvec (2, src, unsigned_flag);
23689 else
23690 move_vec = gen_rtvec (1, src);
23692 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
23693 emit_insn (gen_rtx_SET (tmp, move_unspec));
23695 /* Generate the actual convert insn of the form:
23696 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
23697 UNSPEC_IEEE128_CONVERT))). */
23698 cvt_vec = gen_rtvec (1, tmp);
23699 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23700 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
23701 emit_insn (gen_rtx_SET (dest, cvt));
23705 /* Emit the RTL for an sISEL pattern. */
23707 void
23708 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
23710 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
23713 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
23714 can be used as that dest register. Return the dest register. */
23717 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
23719 if (op2 == const0_rtx)
23720 return op1;
23722 if (GET_CODE (scratch) == SCRATCH)
23723 scratch = gen_reg_rtx (mode);
23725 if (logical_operand (op2, mode))
23726 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
23727 else
23728 emit_insn (gen_rtx_SET (scratch,
23729 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
23731 return scratch;
23734 void
23735 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
23737 rtx condition_rtx;
23738 machine_mode op_mode;
23739 enum rtx_code cond_code;
23740 rtx result = operands[0];
23742 condition_rtx = rs6000_generate_compare (operands[1], mode);
23743 cond_code = GET_CODE (condition_rtx);
23745 if (FLOAT_MODE_P (mode)
23746 && !TARGET_FPRS && TARGET_HARD_FLOAT)
23748 rtx t;
23750 PUT_MODE (condition_rtx, SImode);
23751 t = XEXP (condition_rtx, 0);
23753 gcc_assert (cond_code == NE || cond_code == EQ);
23755 if (cond_code == NE)
23756 emit_insn (gen_e500_flip_gt_bit (t, t));
23758 emit_insn (gen_move_from_CR_gt_bit (result, t));
23759 return;
23762 if (cond_code == NE
23763 || cond_code == GE || cond_code == LE
23764 || cond_code == GEU || cond_code == LEU
23765 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
23767 rtx not_result = gen_reg_rtx (CCEQmode);
23768 rtx not_op, rev_cond_rtx;
23769 machine_mode cc_mode;
23771 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
23773 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
23774 SImode, XEXP (condition_rtx, 0), const0_rtx);
23775 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
23776 emit_insn (gen_rtx_SET (not_result, not_op));
23777 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
23780 op_mode = GET_MODE (XEXP (operands[1], 0));
23781 if (op_mode == VOIDmode)
23782 op_mode = GET_MODE (XEXP (operands[1], 1));
23784 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
23786 PUT_MODE (condition_rtx, DImode);
23787 convert_move (result, condition_rtx, 0);
23789 else
23791 PUT_MODE (condition_rtx, SImode);
23792 emit_insn (gen_rtx_SET (result, condition_rtx));
23796 /* Emit a branch of kind CODE to location LOC. */
23798 void
23799 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
23801 rtx condition_rtx, loc_ref;
23803 condition_rtx = rs6000_generate_compare (operands[0], mode);
23804 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
23805 emit_jump_insn (gen_rtx_SET (pc_rtx,
23806 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
23807 loc_ref, pc_rtx)));
23810 /* Return the string to output a conditional branch to LABEL, which is
23811 the operand template of the label, or NULL if the branch is really a
23812 conditional return.
23814 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
23815 condition code register and its mode specifies what kind of
23816 comparison we made.
23818 REVERSED is nonzero if we should reverse the sense of the comparison.
23820 INSN is the insn. */
23822 char *
23823 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
23825 static char string[64];
23826 enum rtx_code code = GET_CODE (op);
23827 rtx cc_reg = XEXP (op, 0);
23828 machine_mode mode = GET_MODE (cc_reg);
23829 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
23830 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
23831 int really_reversed = reversed ^ need_longbranch;
23832 char *s = string;
23833 const char *ccode;
23834 const char *pred;
23835 rtx note;
23837 validate_condition_mode (code, mode);
23839 /* Work out which way this really branches. We could use
23840 reverse_condition_maybe_unordered here always but this
23841 makes the resulting assembler clearer. */
23842 if (really_reversed)
23844 /* Reversal of FP compares takes care -- an ordered compare
23845 becomes an unordered compare and vice versa. */
23846 if (mode == CCFPmode)
23847 code = reverse_condition_maybe_unordered (code);
23848 else
23849 code = reverse_condition (code);
23852 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
23854 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
23855 to the GT bit. */
23856 switch (code)
23858 case EQ:
23859 /* Opposite of GT. */
23860 code = GT;
23861 break;
23863 case NE:
23864 code = UNLE;
23865 break;
23867 default:
23868 gcc_unreachable ();
23872 switch (code)
23874 /* Not all of these are actually distinct opcodes, but
23875 we distinguish them for clarity of the resulting assembler. */
23876 case NE: case LTGT:
23877 ccode = "ne"; break;
23878 case EQ: case UNEQ:
23879 ccode = "eq"; break;
23880 case GE: case GEU:
23881 ccode = "ge"; break;
23882 case GT: case GTU: case UNGT:
23883 ccode = "gt"; break;
23884 case LE: case LEU:
23885 ccode = "le"; break;
23886 case LT: case LTU: case UNLT:
23887 ccode = "lt"; break;
23888 case UNORDERED: ccode = "un"; break;
23889 case ORDERED: ccode = "nu"; break;
23890 case UNGE: ccode = "nl"; break;
23891 case UNLE: ccode = "ng"; break;
23892 default:
23893 gcc_unreachable ();
23896 /* Maybe we have a guess as to how likely the branch is. */
23897 pred = "";
23898 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
23899 if (note != NULL_RTX)
23901 /* PROB is the difference from 50%. */
23902 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
23904 /* Only hint for highly probable/improbable branches on newer cpus when
23905 we have real profile data, as static prediction overrides processor
23906 dynamic prediction. For older cpus we may as well always hint, but
23907 assume not taken for branches that are very close to 50% as a
23908 mispredicted taken branch is more expensive than a
23909 mispredicted not-taken branch. */
23910 if (rs6000_always_hint
23911 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
23912 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
23913 && br_prob_note_reliable_p (note)))
23915 if (abs (prob) > REG_BR_PROB_BASE / 20
23916 && ((prob > 0) ^ need_longbranch))
23917 pred = "+";
23918 else
23919 pred = "-";
23923 if (label == NULL)
23924 s += sprintf (s, "b%slr%s ", ccode, pred);
23925 else
23926 s += sprintf (s, "b%s%s ", ccode, pred);
23928 /* We need to escape any '%' characters in the reg_names string.
23929 Assume they'd only be the first character.... */
23930 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
23931 *s++ = '%';
23932 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
23934 if (label != NULL)
23936 /* If the branch distance was too far, we may have to use an
23937 unconditional branch to go the distance. */
23938 if (need_longbranch)
23939 s += sprintf (s, ",$+8\n\tb %s", label);
23940 else
23941 s += sprintf (s, ",%s", label);
23944 return string;
23947 /* Return the string to flip the GT bit on a CR. */
23948 char *
23949 output_e500_flip_gt_bit (rtx dst, rtx src)
23951 static char string[64];
23952 int a, b;
23954 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
23955 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
23957 /* GT bit. */
23958 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
23959 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
23961 sprintf (string, "crnot %d,%d", a, b);
23962 return string;
23965 /* Return insn for VSX or Altivec comparisons. */
23967 static rtx
23968 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
23970 rtx mask;
23971 machine_mode mode = GET_MODE (op0);
23973 switch (code)
23975 default:
23976 break;
23978 case GE:
23979 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23980 return NULL_RTX;
23981 /* FALLTHRU */
23983 case EQ:
23984 case GT:
23985 case GTU:
23986 case ORDERED:
23987 case UNORDERED:
23988 case UNEQ:
23989 case LTGT:
23990 mask = gen_reg_rtx (mode);
23991 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
23992 return mask;
23995 return NULL_RTX;
23998 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
23999 DMODE is expected destination mode. This is a recursive function. */
24001 static rtx
24002 rs6000_emit_vector_compare (enum rtx_code rcode,
24003 rtx op0, rtx op1,
24004 machine_mode dmode)
24006 rtx mask;
24007 bool swap_operands = false;
24008 bool try_again = false;
24010 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
24011 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
24013 /* See if the comparison works as is. */
24014 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24015 if (mask)
24016 return mask;
24018 switch (rcode)
24020 case LT:
24021 rcode = GT;
24022 swap_operands = true;
24023 try_again = true;
24024 break;
24025 case LTU:
24026 rcode = GTU;
24027 swap_operands = true;
24028 try_again = true;
24029 break;
24030 case NE:
24031 case UNLE:
24032 case UNLT:
24033 case UNGE:
24034 case UNGT:
24035 /* Invert condition and try again.
24036 e.g., A != B becomes ~(A==B). */
24038 enum rtx_code rev_code;
24039 enum insn_code nor_code;
24040 rtx mask2;
24042 rev_code = reverse_condition_maybe_unordered (rcode);
24043 if (rev_code == UNKNOWN)
24044 return NULL_RTX;
24046 nor_code = optab_handler (one_cmpl_optab, dmode);
24047 if (nor_code == CODE_FOR_nothing)
24048 return NULL_RTX;
24050 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
24051 if (!mask2)
24052 return NULL_RTX;
24054 mask = gen_reg_rtx (dmode);
24055 emit_insn (GEN_FCN (nor_code) (mask, mask2));
24056 return mask;
24058 break;
24059 case GE:
24060 case GEU:
24061 case LE:
24062 case LEU:
24063 /* Try GT/GTU/LT/LTU OR EQ */
24065 rtx c_rtx, eq_rtx;
24066 enum insn_code ior_code;
24067 enum rtx_code new_code;
24069 switch (rcode)
24071 case GE:
24072 new_code = GT;
24073 break;
24075 case GEU:
24076 new_code = GTU;
24077 break;
24079 case LE:
24080 new_code = LT;
24081 break;
24083 case LEU:
24084 new_code = LTU;
24085 break;
24087 default:
24088 gcc_unreachable ();
24091 ior_code = optab_handler (ior_optab, dmode);
24092 if (ior_code == CODE_FOR_nothing)
24093 return NULL_RTX;
24095 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
24096 if (!c_rtx)
24097 return NULL_RTX;
24099 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
24100 if (!eq_rtx)
24101 return NULL_RTX;
24103 mask = gen_reg_rtx (dmode);
24104 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
24105 return mask;
24107 break;
24108 default:
24109 return NULL_RTX;
24112 if (try_again)
24114 if (swap_operands)
24115 std::swap (op0, op1);
24117 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24118 if (mask)
24119 return mask;
24122 /* You only get two chances. */
24123 return NULL_RTX;
24126 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
24127 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
24128 operands for the relation operation COND. */
24131 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
24132 rtx cond, rtx cc_op0, rtx cc_op1)
24134 machine_mode dest_mode = GET_MODE (dest);
24135 machine_mode mask_mode = GET_MODE (cc_op0);
24136 enum rtx_code rcode = GET_CODE (cond);
24137 machine_mode cc_mode = CCmode;
24138 rtx mask;
24139 rtx cond2;
24140 rtx tmp;
24141 bool invert_move = false;
24143 if (VECTOR_UNIT_NONE_P (dest_mode))
24144 return 0;
24146 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
24147 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
24149 switch (rcode)
24151 /* Swap operands if we can, and fall back to doing the operation as
24152 specified, and doing a NOR to invert the test. */
24153 case NE:
24154 case UNLE:
24155 case UNLT:
24156 case UNGE:
24157 case UNGT:
24158 /* Invert condition and try again.
24159 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
24160 invert_move = true;
24161 rcode = reverse_condition_maybe_unordered (rcode);
24162 if (rcode == UNKNOWN)
24163 return 0;
24164 break;
24166 /* Mark unsigned tests with CCUNSmode. */
24167 case GTU:
24168 case GEU:
24169 case LTU:
24170 case LEU:
24171 cc_mode = CCUNSmode;
24172 break;
24174 default:
24175 break;
24178 /* Get the vector mask for the given relational operations. */
24179 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
24181 if (!mask)
24182 return 0;
24184 if (invert_move)
24186 tmp = op_true;
24187 op_true = op_false;
24188 op_false = tmp;
24191 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
24192 CONST0_RTX (dest_mode));
24193 emit_insn (gen_rtx_SET (dest,
24194 gen_rtx_IF_THEN_ELSE (dest_mode,
24195 cond2,
24196 op_true,
24197 op_false)));
24198 return 1;
24201 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
24202 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
24203 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
24204 hardware has no such operation. */
24206 static int
24207 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24209 enum rtx_code code = GET_CODE (op);
24210 rtx op0 = XEXP (op, 0);
24211 rtx op1 = XEXP (op, 1);
24212 machine_mode compare_mode = GET_MODE (op0);
24213 machine_mode result_mode = GET_MODE (dest);
24214 bool max_p = false;
24216 if (result_mode != compare_mode)
24217 return 0;
24219 if (code == GE || code == GT)
24220 max_p = true;
24221 else if (code == LE || code == LT)
24222 max_p = false;
24223 else
24224 return 0;
24226 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
24229 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
24230 max_p = !max_p;
24232 else
24233 return 0;
24235 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
24236 return 1;
24239 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
24240 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
24241 operands of the last comparison is nonzero/true, FALSE_COND if it is
24242 zero/false. Return 0 if the hardware has no such operation. */
24244 static int
24245 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24247 enum rtx_code code = GET_CODE (op);
24248 rtx op0 = XEXP (op, 0);
24249 rtx op1 = XEXP (op, 1);
24250 machine_mode result_mode = GET_MODE (dest);
24251 rtx compare_rtx;
24252 rtx cmove_rtx;
24253 rtx clobber_rtx;
24255 if (!can_create_pseudo_p ())
24256 return 0;
24258 switch (code)
24260 case EQ:
24261 case GE:
24262 case GT:
24263 break;
24265 case NE:
24266 case LT:
24267 case LE:
24268 code = swap_condition (code);
24269 std::swap (op0, op1);
24270 break;
24272 default:
24273 return 0;
24276 /* Generate: [(parallel [(set (dest)
24277 (if_then_else (op (cmp1) (cmp2))
24278 (true)
24279 (false)))
24280 (clobber (scratch))])]. */
24282 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
24283 cmove_rtx = gen_rtx_SET (dest,
24284 gen_rtx_IF_THEN_ELSE (result_mode,
24285 compare_rtx,
24286 true_cond,
24287 false_cond));
24289 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
24290 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24291 gen_rtvec (2, cmove_rtx, clobber_rtx)));
24293 return 1;
24296 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
24297 operands of the last comparison is nonzero/true, FALSE_COND if it
24298 is zero/false. Return 0 if the hardware has no such operation. */
24301 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24303 enum rtx_code code = GET_CODE (op);
24304 rtx op0 = XEXP (op, 0);
24305 rtx op1 = XEXP (op, 1);
24306 machine_mode compare_mode = GET_MODE (op0);
24307 machine_mode result_mode = GET_MODE (dest);
24308 rtx temp;
24309 bool is_against_zero;
24311 /* These modes should always match. */
24312 if (GET_MODE (op1) != compare_mode
24313 /* In the isel case however, we can use a compare immediate, so
24314 op1 may be a small constant. */
24315 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
24316 return 0;
24317 if (GET_MODE (true_cond) != result_mode)
24318 return 0;
24319 if (GET_MODE (false_cond) != result_mode)
24320 return 0;
24322 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
24323 if (TARGET_P9_MINMAX
24324 && (compare_mode == SFmode || compare_mode == DFmode)
24325 && (result_mode == SFmode || result_mode == DFmode))
24327 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
24328 return 1;
24330 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
24331 return 1;
24334 /* Don't allow using floating point comparisons for integer results for
24335 now. */
24336 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
24337 return 0;
24339 /* First, work out if the hardware can do this at all, or
24340 if it's too slow.... */
24341 if (!FLOAT_MODE_P (compare_mode))
24343 if (TARGET_ISEL)
24344 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
24345 return 0;
24347 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
24348 && SCALAR_FLOAT_MODE_P (compare_mode))
24349 return 0;
24351 is_against_zero = op1 == CONST0_RTX (compare_mode);
24353 /* A floating-point subtract might overflow, underflow, or produce
24354 an inexact result, thus changing the floating-point flags, so it
24355 can't be generated if we care about that. It's safe if one side
24356 of the construct is zero, since then no subtract will be
24357 generated. */
24358 if (SCALAR_FLOAT_MODE_P (compare_mode)
24359 && flag_trapping_math && ! is_against_zero)
24360 return 0;
24362 /* Eliminate half of the comparisons by switching operands, this
24363 makes the remaining code simpler. */
24364 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
24365 || code == LTGT || code == LT || code == UNLE)
24367 code = reverse_condition_maybe_unordered (code);
24368 temp = true_cond;
24369 true_cond = false_cond;
24370 false_cond = temp;
24373 /* UNEQ and LTGT take four instructions for a comparison with zero,
24374 it'll probably be faster to use a branch here too. */
24375 if (code == UNEQ && HONOR_NANS (compare_mode))
24376 return 0;
24378 /* We're going to try to implement comparisons by performing
24379 a subtract, then comparing against zero. Unfortunately,
24380 Inf - Inf is NaN which is not zero, and so if we don't
24381 know that the operand is finite and the comparison
24382 would treat EQ different to UNORDERED, we can't do it. */
24383 if (HONOR_INFINITIES (compare_mode)
24384 && code != GT && code != UNGE
24385 && (GET_CODE (op1) != CONST_DOUBLE
24386 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
24387 /* Constructs of the form (a OP b ? a : b) are safe. */
24388 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
24389 || (! rtx_equal_p (op0, true_cond)
24390 && ! rtx_equal_p (op1, true_cond))))
24391 return 0;
24393 /* At this point we know we can use fsel. */
24395 /* Reduce the comparison to a comparison against zero. */
24396 if (! is_against_zero)
24398 temp = gen_reg_rtx (compare_mode);
24399 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
24400 op0 = temp;
24401 op1 = CONST0_RTX (compare_mode);
24404 /* If we don't care about NaNs we can reduce some of the comparisons
24405 down to faster ones. */
24406 if (! HONOR_NANS (compare_mode))
24407 switch (code)
24409 case GT:
24410 code = LE;
24411 temp = true_cond;
24412 true_cond = false_cond;
24413 false_cond = temp;
24414 break;
24415 case UNGE:
24416 code = GE;
24417 break;
24418 case UNEQ:
24419 code = EQ;
24420 break;
24421 default:
24422 break;
24425 /* Now, reduce everything down to a GE. */
24426 switch (code)
24428 case GE:
24429 break;
24431 case LE:
24432 temp = gen_reg_rtx (compare_mode);
24433 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24434 op0 = temp;
24435 break;
24437 case ORDERED:
24438 temp = gen_reg_rtx (compare_mode);
24439 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
24440 op0 = temp;
24441 break;
24443 case EQ:
24444 temp = gen_reg_rtx (compare_mode);
24445 emit_insn (gen_rtx_SET (temp,
24446 gen_rtx_NEG (compare_mode,
24447 gen_rtx_ABS (compare_mode, op0))));
24448 op0 = temp;
24449 break;
24451 case UNGE:
24452 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
24453 temp = gen_reg_rtx (result_mode);
24454 emit_insn (gen_rtx_SET (temp,
24455 gen_rtx_IF_THEN_ELSE (result_mode,
24456 gen_rtx_GE (VOIDmode,
24457 op0, op1),
24458 true_cond, false_cond)));
24459 false_cond = true_cond;
24460 true_cond = temp;
24462 temp = gen_reg_rtx (compare_mode);
24463 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24464 op0 = temp;
24465 break;
24467 case GT:
24468 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
24469 temp = gen_reg_rtx (result_mode);
24470 emit_insn (gen_rtx_SET (temp,
24471 gen_rtx_IF_THEN_ELSE (result_mode,
24472 gen_rtx_GE (VOIDmode,
24473 op0, op1),
24474 true_cond, false_cond)));
24475 true_cond = false_cond;
24476 false_cond = temp;
24478 temp = gen_reg_rtx (compare_mode);
24479 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24480 op0 = temp;
24481 break;
24483 default:
24484 gcc_unreachable ();
24487 emit_insn (gen_rtx_SET (dest,
24488 gen_rtx_IF_THEN_ELSE (result_mode,
24489 gen_rtx_GE (VOIDmode,
24490 op0, op1),
24491 true_cond, false_cond)));
24492 return 1;
24495 /* Same as above, but for ints (isel). */
24497 static int
24498 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24500 rtx condition_rtx, cr;
24501 machine_mode mode = GET_MODE (dest);
24502 enum rtx_code cond_code;
24503 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
24504 bool signedp;
24506 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
24507 return 0;
24509 /* We still have to do the compare, because isel doesn't do a
24510 compare, it just looks at the CRx bits set by a previous compare
24511 instruction. */
24512 condition_rtx = rs6000_generate_compare (op, mode);
24513 cond_code = GET_CODE (condition_rtx);
24514 cr = XEXP (condition_rtx, 0);
24515 signedp = GET_MODE (cr) == CCmode;
24517 isel_func = (mode == SImode
24518 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
24519 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
24521 switch (cond_code)
24523 case LT: case GT: case LTU: case GTU: case EQ:
24524 /* isel handles these directly. */
24525 break;
24527 default:
24528 /* We need to swap the sense of the comparison. */
24530 std::swap (false_cond, true_cond);
24531 PUT_CODE (condition_rtx, reverse_condition (cond_code));
24533 break;
24536 false_cond = force_reg (mode, false_cond);
24537 if (true_cond != const0_rtx)
24538 true_cond = force_reg (mode, true_cond);
24540 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
24542 return 1;
24545 const char *
24546 output_isel (rtx *operands)
24548 enum rtx_code code;
24550 code = GET_CODE (operands[1]);
24552 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
24554 gcc_assert (GET_CODE (operands[2]) == REG
24555 && GET_CODE (operands[3]) == REG);
24556 PUT_CODE (operands[1], reverse_condition (code));
24557 return "isel %0,%3,%2,%j1";
24560 return "isel %0,%2,%3,%j1";
24563 void
24564 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
24566 machine_mode mode = GET_MODE (op0);
24567 enum rtx_code c;
24568 rtx target;
24570 /* VSX/altivec have direct min/max insns. */
24571 if ((code == SMAX || code == SMIN)
24572 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
24573 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
24575 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
24576 return;
24579 if (code == SMAX || code == SMIN)
24580 c = GE;
24581 else
24582 c = GEU;
24584 if (code == SMAX || code == UMAX)
24585 target = emit_conditional_move (dest, c, op0, op1, mode,
24586 op0, op1, mode, 0);
24587 else
24588 target = emit_conditional_move (dest, c, op0, op1, mode,
24589 op1, op0, mode, 0);
24590 gcc_assert (target);
24591 if (target != dest)
24592 emit_move_insn (dest, target);
24595 /* Split a signbit operation on 64-bit machines with direct move. Also allow
24596 for the value to come from memory or if it is already loaded into a GPR. */
24598 void
24599 rs6000_split_signbit (rtx dest, rtx src)
24601 machine_mode d_mode = GET_MODE (dest);
24602 machine_mode s_mode = GET_MODE (src);
24603 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
24604 rtx shift_reg = dest_di;
24606 gcc_assert (REG_P (dest));
24607 gcc_assert (REG_P (src) || MEM_P (src));
24608 gcc_assert (s_mode == KFmode || s_mode == TFmode);
24610 if (MEM_P (src))
24612 rtx mem = (WORDS_BIG_ENDIAN
24613 ? adjust_address (src, DImode, 0)
24614 : adjust_address (src, DImode, 8));
24615 emit_insn (gen_rtx_SET (dest_di, mem));
24618 else
24620 unsigned int r = REGNO (src);
24622 /* If this is a VSX register, generate the special mfvsrd instruction
24623 to get it in a GPR. Until we support SF and DF modes, that will
24624 always be true. */
24625 gcc_assert (VSX_REGNO_P (r));
24627 if (s_mode == KFmode)
24628 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
24629 else
24630 emit_insn (gen_signbittf2_dm2 (dest_di, src));
24633 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
24634 return;
24637 /* A subroutine of the atomic operation splitters. Jump to LABEL if
24638 COND is true. Mark the jump as unlikely to be taken. */
24640 static void
24641 emit_unlikely_jump (rtx cond, rtx label)
24643 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
24644 rtx x;
24646 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
24647 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
24648 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
24651 /* A subroutine of the atomic operation splitters. Emit a load-locked
24652 instruction in MODE. For QI/HImode, possibly use a pattern than includes
24653 the zero_extend operation. */
24655 static void
24656 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
24658 rtx (*fn) (rtx, rtx) = NULL;
24660 switch (mode)
24662 case QImode:
24663 fn = gen_load_lockedqi;
24664 break;
24665 case HImode:
24666 fn = gen_load_lockedhi;
24667 break;
24668 case SImode:
24669 if (GET_MODE (mem) == QImode)
24670 fn = gen_load_lockedqi_si;
24671 else if (GET_MODE (mem) == HImode)
24672 fn = gen_load_lockedhi_si;
24673 else
24674 fn = gen_load_lockedsi;
24675 break;
24676 case DImode:
24677 fn = gen_load_lockeddi;
24678 break;
24679 case TImode:
24680 fn = gen_load_lockedti;
24681 break;
24682 default:
24683 gcc_unreachable ();
24685 emit_insn (fn (reg, mem));
24688 /* A subroutine of the atomic operation splitters. Emit a store-conditional
24689 instruction in MODE. */
24691 static void
24692 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
24694 rtx (*fn) (rtx, rtx, rtx) = NULL;
24696 switch (mode)
24698 case QImode:
24699 fn = gen_store_conditionalqi;
24700 break;
24701 case HImode:
24702 fn = gen_store_conditionalhi;
24703 break;
24704 case SImode:
24705 fn = gen_store_conditionalsi;
24706 break;
24707 case DImode:
24708 fn = gen_store_conditionaldi;
24709 break;
24710 case TImode:
24711 fn = gen_store_conditionalti;
24712 break;
24713 default:
24714 gcc_unreachable ();
24717 /* Emit sync before stwcx. to address PPC405 Erratum. */
24718 if (PPC405_ERRATUM77)
24719 emit_insn (gen_hwsync ());
24721 emit_insn (fn (res, mem, val));
24724 /* Expand barriers before and after a load_locked/store_cond sequence. */
24726 static rtx
24727 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
24729 rtx addr = XEXP (mem, 0);
24730 int strict_p = (reload_in_progress || reload_completed);
24732 if (!legitimate_indirect_address_p (addr, strict_p)
24733 && !legitimate_indexed_address_p (addr, strict_p))
24735 addr = force_reg (Pmode, addr);
24736 mem = replace_equiv_address_nv (mem, addr);
24739 switch (model)
24741 case MEMMODEL_RELAXED:
24742 case MEMMODEL_CONSUME:
24743 case MEMMODEL_ACQUIRE:
24744 break;
24745 case MEMMODEL_RELEASE:
24746 case MEMMODEL_ACQ_REL:
24747 emit_insn (gen_lwsync ());
24748 break;
24749 case MEMMODEL_SEQ_CST:
24750 emit_insn (gen_hwsync ());
24751 break;
24752 default:
24753 gcc_unreachable ();
24755 return mem;
24758 static void
24759 rs6000_post_atomic_barrier (enum memmodel model)
24761 switch (model)
24763 case MEMMODEL_RELAXED:
24764 case MEMMODEL_CONSUME:
24765 case MEMMODEL_RELEASE:
24766 break;
24767 case MEMMODEL_ACQUIRE:
24768 case MEMMODEL_ACQ_REL:
24769 case MEMMODEL_SEQ_CST:
24770 emit_insn (gen_isync ());
24771 break;
24772 default:
24773 gcc_unreachable ();
24777 /* A subroutine of the various atomic expanders. For sub-word operations,
24778 we must adjust things to operate on SImode. Given the original MEM,
24779 return a new aligned memory. Also build and return the quantities by
24780 which to shift and mask. */
24782 static rtx
24783 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
24785 rtx addr, align, shift, mask, mem;
24786 HOST_WIDE_INT shift_mask;
24787 machine_mode mode = GET_MODE (orig_mem);
24789 /* For smaller modes, we have to implement this via SImode. */
24790 shift_mask = (mode == QImode ? 0x18 : 0x10);
24792 addr = XEXP (orig_mem, 0);
24793 addr = force_reg (GET_MODE (addr), addr);
24795 /* Aligned memory containing subword. Generate a new memory. We
24796 do not want any of the existing MEM_ATTR data, as we're now
24797 accessing memory outside the original object. */
24798 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
24799 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24800 mem = gen_rtx_MEM (SImode, align);
24801 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
24802 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
24803 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
24805 /* Shift amount for subword relative to aligned word. */
24806 shift = gen_reg_rtx (SImode);
24807 addr = gen_lowpart (SImode, addr);
24808 rtx tmp = gen_reg_rtx (SImode);
24809 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
24810 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
24811 if (BYTES_BIG_ENDIAN)
24812 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
24813 shift, 1, OPTAB_LIB_WIDEN);
24814 *pshift = shift;
24816 /* Mask for insertion. */
24817 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
24818 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
24819 *pmask = mask;
24821 return mem;
24824 /* A subroutine of the various atomic expanders. For sub-word operands,
24825 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
24827 static rtx
24828 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
24830 rtx x;
24832 x = gen_reg_rtx (SImode);
24833 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
24834 gen_rtx_NOT (SImode, mask),
24835 oldval)));
24837 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
24839 return x;
24842 /* A subroutine of the various atomic expanders. For sub-word operands,
24843 extract WIDE to NARROW via SHIFT. */
24845 static void
24846 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
24848 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
24849 wide, 1, OPTAB_LIB_WIDEN);
24850 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
24853 /* Expand an atomic compare and swap operation. */
24855 void
24856 rs6000_expand_atomic_compare_and_swap (rtx operands[])
24858 rtx boolval, retval, mem, oldval, newval, cond;
24859 rtx label1, label2, x, mask, shift;
24860 machine_mode mode, orig_mode;
24861 enum memmodel mod_s, mod_f;
24862 bool is_weak;
24864 boolval = operands[0];
24865 retval = operands[1];
24866 mem = operands[2];
24867 oldval = operands[3];
24868 newval = operands[4];
24869 is_weak = (INTVAL (operands[5]) != 0);
24870 mod_s = memmodel_base (INTVAL (operands[6]));
24871 mod_f = memmodel_base (INTVAL (operands[7]));
24872 orig_mode = mode = GET_MODE (mem);
24874 mask = shift = NULL_RTX;
24875 if (mode == QImode || mode == HImode)
24877 /* Before power8, we didn't have access to lbarx/lharx, so generate a
24878 lwarx and shift/mask operations. With power8, we need to do the
24879 comparison in SImode, but the store is still done in QI/HImode. */
24880 oldval = convert_modes (SImode, mode, oldval, 1);
24882 if (!TARGET_SYNC_HI_QI)
24884 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24886 /* Shift and mask OLDVAL into position with the word. */
24887 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
24888 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24890 /* Shift and mask NEWVAL into position within the word. */
24891 newval = convert_modes (SImode, mode, newval, 1);
24892 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
24893 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24896 /* Prepare to adjust the return value. */
24897 retval = gen_reg_rtx (SImode);
24898 mode = SImode;
24900 else if (reg_overlap_mentioned_p (retval, oldval))
24901 oldval = copy_to_reg (oldval);
24903 if (mode != TImode && !reg_or_short_operand (oldval, mode))
24904 oldval = copy_to_mode_reg (mode, oldval);
24906 if (reg_overlap_mentioned_p (retval, newval))
24907 newval = copy_to_reg (newval);
24909 mem = rs6000_pre_atomic_barrier (mem, mod_s);
24911 label1 = NULL_RTX;
24912 if (!is_weak)
24914 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24915 emit_label (XEXP (label1, 0));
24917 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24919 emit_load_locked (mode, retval, mem);
24921 x = retval;
24922 if (mask)
24923 x = expand_simple_binop (SImode, AND, retval, mask,
24924 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24926 cond = gen_reg_rtx (CCmode);
24927 /* If we have TImode, synthesize a comparison. */
24928 if (mode != TImode)
24929 x = gen_rtx_COMPARE (CCmode, x, oldval);
24930 else
24932 rtx xor1_result = gen_reg_rtx (DImode);
24933 rtx xor2_result = gen_reg_rtx (DImode);
24934 rtx or_result = gen_reg_rtx (DImode);
24935 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
24936 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
24937 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
24938 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
24940 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
24941 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
24942 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
24943 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
24946 emit_insn (gen_rtx_SET (cond, x));
24948 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24949 emit_unlikely_jump (x, label2);
24951 x = newval;
24952 if (mask)
24953 x = rs6000_mask_atomic_subword (retval, newval, mask);
24955 emit_store_conditional (orig_mode, cond, mem, x);
24957 if (!is_weak)
24959 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24960 emit_unlikely_jump (x, label1);
24963 if (!is_mm_relaxed (mod_f))
24964 emit_label (XEXP (label2, 0));
24966 rs6000_post_atomic_barrier (mod_s);
24968 if (is_mm_relaxed (mod_f))
24969 emit_label (XEXP (label2, 0));
24971 if (shift)
24972 rs6000_finish_atomic_subword (operands[1], retval, shift);
24973 else if (mode != GET_MODE (operands[1]))
24974 convert_move (operands[1], retval, 1);
24976 /* In all cases, CR0 contains EQ on success, and NE on failure. */
24977 x = gen_rtx_EQ (SImode, cond, const0_rtx);
24978 emit_insn (gen_rtx_SET (boolval, x));
24981 /* Expand an atomic exchange operation. */
24983 void
24984 rs6000_expand_atomic_exchange (rtx operands[])
24986 rtx retval, mem, val, cond;
24987 machine_mode mode;
24988 enum memmodel model;
24989 rtx label, x, mask, shift;
24991 retval = operands[0];
24992 mem = operands[1];
24993 val = operands[2];
24994 model = memmodel_base (INTVAL (operands[3]));
24995 mode = GET_MODE (mem);
24997 mask = shift = NULL_RTX;
24998 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
25000 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
25002 /* Shift and mask VAL into position with the word. */
25003 val = convert_modes (SImode, mode, val, 1);
25004 val = expand_simple_binop (SImode, ASHIFT, val, shift,
25005 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25007 /* Prepare to adjust the return value. */
25008 retval = gen_reg_rtx (SImode);
25009 mode = SImode;
25012 mem = rs6000_pre_atomic_barrier (mem, model);
25014 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
25015 emit_label (XEXP (label, 0));
25017 emit_load_locked (mode, retval, mem);
25019 x = val;
25020 if (mask)
25021 x = rs6000_mask_atomic_subword (retval, val, mask);
25023 cond = gen_reg_rtx (CCmode);
25024 emit_store_conditional (mode, cond, mem, x);
25026 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25027 emit_unlikely_jump (x, label);
25029 rs6000_post_atomic_barrier (model);
25031 if (shift)
25032 rs6000_finish_atomic_subword (operands[0], retval, shift);
25035 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
25036 to perform. MEM is the memory on which to operate. VAL is the second
25037 operand of the binary operator. BEFORE and AFTER are optional locations to
25038 return the value of MEM either before of after the operation. MODEL_RTX
25039 is a CONST_INT containing the memory model to use. */
25041 void
25042 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
25043 rtx orig_before, rtx orig_after, rtx model_rtx)
25045 enum memmodel model = memmodel_base (INTVAL (model_rtx));
25046 machine_mode mode = GET_MODE (mem);
25047 machine_mode store_mode = mode;
25048 rtx label, x, cond, mask, shift;
25049 rtx before = orig_before, after = orig_after;
25051 mask = shift = NULL_RTX;
25052 /* On power8, we want to use SImode for the operation. On previous systems,
25053 use the operation in a subword and shift/mask to get the proper byte or
25054 halfword. */
25055 if (mode == QImode || mode == HImode)
25057 if (TARGET_SYNC_HI_QI)
25059 val = convert_modes (SImode, mode, val, 1);
25061 /* Prepare to adjust the return value. */
25062 before = gen_reg_rtx (SImode);
25063 if (after)
25064 after = gen_reg_rtx (SImode);
25065 mode = SImode;
25067 else
25069 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
25071 /* Shift and mask VAL into position with the word. */
25072 val = convert_modes (SImode, mode, val, 1);
25073 val = expand_simple_binop (SImode, ASHIFT, val, shift,
25074 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25076 switch (code)
25078 case IOR:
25079 case XOR:
25080 /* We've already zero-extended VAL. That is sufficient to
25081 make certain that it does not affect other bits. */
25082 mask = NULL;
25083 break;
25085 case AND:
25086 /* If we make certain that all of the other bits in VAL are
25087 set, that will be sufficient to not affect other bits. */
25088 x = gen_rtx_NOT (SImode, mask);
25089 x = gen_rtx_IOR (SImode, x, val);
25090 emit_insn (gen_rtx_SET (val, x));
25091 mask = NULL;
25092 break;
25094 case NOT:
25095 case PLUS:
25096 case MINUS:
25097 /* These will all affect bits outside the field and need
25098 adjustment via MASK within the loop. */
25099 break;
25101 default:
25102 gcc_unreachable ();
25105 /* Prepare to adjust the return value. */
25106 before = gen_reg_rtx (SImode);
25107 if (after)
25108 after = gen_reg_rtx (SImode);
25109 store_mode = mode = SImode;
25113 mem = rs6000_pre_atomic_barrier (mem, model);
25115 label = gen_label_rtx ();
25116 emit_label (label);
25117 label = gen_rtx_LABEL_REF (VOIDmode, label);
25119 if (before == NULL_RTX)
25120 before = gen_reg_rtx (mode);
25122 emit_load_locked (mode, before, mem);
25124 if (code == NOT)
25126 x = expand_simple_binop (mode, AND, before, val,
25127 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25128 after = expand_simple_unop (mode, NOT, x, after, 1);
25130 else
25132 after = expand_simple_binop (mode, code, before, val,
25133 after, 1, OPTAB_LIB_WIDEN);
25136 x = after;
25137 if (mask)
25139 x = expand_simple_binop (SImode, AND, after, mask,
25140 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25141 x = rs6000_mask_atomic_subword (before, x, mask);
25143 else if (store_mode != mode)
25144 x = convert_modes (store_mode, mode, x, 1);
25146 cond = gen_reg_rtx (CCmode);
25147 emit_store_conditional (store_mode, cond, mem, x);
25149 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25150 emit_unlikely_jump (x, label);
25152 rs6000_post_atomic_barrier (model);
25154 if (shift)
25156 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
25157 then do the calcuations in a SImode register. */
25158 if (orig_before)
25159 rs6000_finish_atomic_subword (orig_before, before, shift);
25160 if (orig_after)
25161 rs6000_finish_atomic_subword (orig_after, after, shift);
25163 else if (store_mode != mode)
25165 /* QImode/HImode on machines with lbarx/lharx where we do the native
25166 operation and then do the calcuations in a SImode register. */
25167 if (orig_before)
25168 convert_move (orig_before, before, 1);
25169 if (orig_after)
25170 convert_move (orig_after, after, 1);
25172 else if (orig_after && after != orig_after)
25173 emit_move_insn (orig_after, after);
25176 /* Emit instructions to move SRC to DST. Called by splitters for
25177 multi-register moves. It will emit at most one instruction for
25178 each register that is accessed; that is, it won't emit li/lis pairs
25179 (or equivalent for 64-bit code). One of SRC or DST must be a hard
25180 register. */
25182 void
25183 rs6000_split_multireg_move (rtx dst, rtx src)
25185 /* The register number of the first register being moved. */
25186 int reg;
25187 /* The mode that is to be moved. */
25188 machine_mode mode;
25189 /* The mode that the move is being done in, and its size. */
25190 machine_mode reg_mode;
25191 int reg_mode_size;
25192 /* The number of registers that will be moved. */
25193 int nregs;
25195 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
25196 mode = GET_MODE (dst);
25197 nregs = hard_regno_nregs[reg][mode];
25198 if (FP_REGNO_P (reg))
25199 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
25200 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
25201 else if (ALTIVEC_REGNO_P (reg))
25202 reg_mode = V16QImode;
25203 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
25204 reg_mode = DFmode;
25205 else
25206 reg_mode = word_mode;
25207 reg_mode_size = GET_MODE_SIZE (reg_mode);
25209 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
25211 /* TDmode residing in FP registers is special, since the ISA requires that
25212 the lower-numbered word of a register pair is always the most significant
25213 word, even in little-endian mode. This does not match the usual subreg
25214 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
25215 the appropriate constituent registers "by hand" in little-endian mode.
25217 Note we do not need to check for destructive overlap here since TDmode
25218 can only reside in even/odd register pairs. */
25219 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
25221 rtx p_src, p_dst;
25222 int i;
25224 for (i = 0; i < nregs; i++)
25226 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
25227 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
25228 else
25229 p_src = simplify_gen_subreg (reg_mode, src, mode,
25230 i * reg_mode_size);
25232 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
25233 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
25234 else
25235 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
25236 i * reg_mode_size);
25238 emit_insn (gen_rtx_SET (p_dst, p_src));
25241 return;
25244 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
25246 /* Move register range backwards, if we might have destructive
25247 overlap. */
25248 int i;
25249 for (i = nregs - 1; i >= 0; i--)
25250 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25251 i * reg_mode_size),
25252 simplify_gen_subreg (reg_mode, src, mode,
25253 i * reg_mode_size)));
25255 else
25257 int i;
25258 int j = -1;
25259 bool used_update = false;
25260 rtx restore_basereg = NULL_RTX;
25262 if (MEM_P (src) && INT_REGNO_P (reg))
25264 rtx breg;
25266 if (GET_CODE (XEXP (src, 0)) == PRE_INC
25267 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
25269 rtx delta_rtx;
25270 breg = XEXP (XEXP (src, 0), 0);
25271 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
25272 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
25273 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
25274 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25275 src = replace_equiv_address (src, breg);
25277 else if (! rs6000_offsettable_memref_p (src, reg_mode))
25279 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
25281 rtx basereg = XEXP (XEXP (src, 0), 0);
25282 if (TARGET_UPDATE)
25284 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
25285 emit_insn (gen_rtx_SET (ndst,
25286 gen_rtx_MEM (reg_mode,
25287 XEXP (src, 0))));
25288 used_update = true;
25290 else
25291 emit_insn (gen_rtx_SET (basereg,
25292 XEXP (XEXP (src, 0), 1)));
25293 src = replace_equiv_address (src, basereg);
25295 else
25297 rtx basereg = gen_rtx_REG (Pmode, reg);
25298 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
25299 src = replace_equiv_address (src, basereg);
25303 breg = XEXP (src, 0);
25304 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
25305 breg = XEXP (breg, 0);
25307 /* If the base register we are using to address memory is
25308 also a destination reg, then change that register last. */
25309 if (REG_P (breg)
25310 && REGNO (breg) >= REGNO (dst)
25311 && REGNO (breg) < REGNO (dst) + nregs)
25312 j = REGNO (breg) - REGNO (dst);
25314 else if (MEM_P (dst) && INT_REGNO_P (reg))
25316 rtx breg;
25318 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
25319 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
25321 rtx delta_rtx;
25322 breg = XEXP (XEXP (dst, 0), 0);
25323 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
25324 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
25325 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
25327 /* We have to update the breg before doing the store.
25328 Use store with update, if available. */
25330 if (TARGET_UPDATE)
25332 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25333 emit_insn (TARGET_32BIT
25334 ? (TARGET_POWERPC64
25335 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
25336 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
25337 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
25338 used_update = true;
25340 else
25341 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25342 dst = replace_equiv_address (dst, breg);
25344 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
25345 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
25347 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
25349 rtx basereg = XEXP (XEXP (dst, 0), 0);
25350 if (TARGET_UPDATE)
25352 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25353 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
25354 XEXP (dst, 0)),
25355 nsrc));
25356 used_update = true;
25358 else
25359 emit_insn (gen_rtx_SET (basereg,
25360 XEXP (XEXP (dst, 0), 1)));
25361 dst = replace_equiv_address (dst, basereg);
25363 else
25365 rtx basereg = XEXP (XEXP (dst, 0), 0);
25366 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
25367 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
25368 && REG_P (basereg)
25369 && REG_P (offsetreg)
25370 && REGNO (basereg) != REGNO (offsetreg));
25371 if (REGNO (basereg) == 0)
25373 rtx tmp = offsetreg;
25374 offsetreg = basereg;
25375 basereg = tmp;
25377 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
25378 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
25379 dst = replace_equiv_address (dst, basereg);
25382 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
25383 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
25386 for (i = 0; i < nregs; i++)
25388 /* Calculate index to next subword. */
25389 ++j;
25390 if (j == nregs)
25391 j = 0;
25393 /* If compiler already emitted move of first word by
25394 store with update, no need to do anything. */
25395 if (j == 0 && used_update)
25396 continue;
25398 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25399 j * reg_mode_size),
25400 simplify_gen_subreg (reg_mode, src, mode,
25401 j * reg_mode_size)));
25403 if (restore_basereg != NULL_RTX)
25404 emit_insn (restore_basereg);
25409 /* This page contains routines that are used to determine what the
25410 function prologue and epilogue code will do and write them out. */
25412 static inline bool
25413 save_reg_p (int r)
25415 return !call_used_regs[r] && df_regs_ever_live_p (r);
25418 /* Determine whether the gp REG is really used. */
25420 static bool
25421 rs6000_reg_live_or_pic_offset_p (int reg)
25423 /* We need to mark the PIC offset register live for the same conditions
25424 as it is set up, or otherwise it won't be saved before we clobber it. */
25426 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
25428 if (TARGET_TOC && TARGET_MINIMAL_TOC
25429 && (crtl->calls_eh_return
25430 || df_regs_ever_live_p (reg)
25431 || get_pool_size ()))
25432 return true;
25434 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
25435 && flag_pic)
25436 return true;
25439 /* If the function calls eh_return, claim used all the registers that would
25440 be checked for liveness otherwise. */
25442 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
25443 && !call_used_regs[reg]);
25446 /* Return the first fixed-point register that is required to be
25447 saved. 32 if none. */
25450 first_reg_to_save (void)
25452 int first_reg;
25454 /* Find lowest numbered live register. */
25455 for (first_reg = 13; first_reg <= 31; first_reg++)
25456 if (save_reg_p (first_reg))
25457 break;
25459 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
25460 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
25461 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25462 || (TARGET_TOC && TARGET_MINIMAL_TOC))
25463 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
25464 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
25466 #if TARGET_MACHO
25467 if (flag_pic
25468 && crtl->uses_pic_offset_table
25469 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
25470 return RS6000_PIC_OFFSET_TABLE_REGNUM;
25471 #endif
25473 return first_reg;
25476 /* Similar, for FP regs. */
25479 first_fp_reg_to_save (void)
25481 int first_reg;
25483 /* Find lowest numbered live register. */
25484 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
25485 if (save_reg_p (first_reg))
25486 break;
25488 return first_reg;
25491 /* Similar, for AltiVec regs. */
25493 static int
25494 first_altivec_reg_to_save (void)
25496 int i;
25498 /* Stack frame remains as is unless we are in AltiVec ABI. */
25499 if (! TARGET_ALTIVEC_ABI)
25500 return LAST_ALTIVEC_REGNO + 1;
25502 /* On Darwin, the unwind routines are compiled without
25503 TARGET_ALTIVEC, and use save_world to save/restore the
25504 altivec registers when necessary. */
25505 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25506 && ! TARGET_ALTIVEC)
25507 return FIRST_ALTIVEC_REGNO + 20;
25509 /* Find lowest numbered live register. */
25510 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
25511 if (save_reg_p (i))
25512 break;
25514 return i;
25517 /* Return a 32-bit mask of the AltiVec registers we need to set in
25518 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
25519 the 32-bit word is 0. */
25521 static unsigned int
25522 compute_vrsave_mask (void)
25524 unsigned int i, mask = 0;
25526 /* On Darwin, the unwind routines are compiled without
25527 TARGET_ALTIVEC, and use save_world to save/restore the
25528 call-saved altivec registers when necessary. */
25529 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25530 && ! TARGET_ALTIVEC)
25531 mask |= 0xFFF;
25533 /* First, find out if we use _any_ altivec registers. */
25534 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25535 if (df_regs_ever_live_p (i))
25536 mask |= ALTIVEC_REG_BIT (i);
25538 if (mask == 0)
25539 return mask;
25541 /* Next, remove the argument registers from the set. These must
25542 be in the VRSAVE mask set by the caller, so we don't need to add
25543 them in again. More importantly, the mask we compute here is
25544 used to generate CLOBBERs in the set_vrsave insn, and we do not
25545 wish the argument registers to die. */
25546 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
25547 mask &= ~ALTIVEC_REG_BIT (i);
25549 /* Similarly, remove the return value from the set. */
25551 bool yes = false;
25552 diddle_return_value (is_altivec_return_reg, &yes);
25553 if (yes)
25554 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
25557 return mask;
25560 /* For a very restricted set of circumstances, we can cut down the
25561 size of prologues/epilogues by calling our own save/restore-the-world
25562 routines. */
25564 static void
25565 compute_save_world_info (rs6000_stack_t *info)
25567 info->world_save_p = 1;
25568 info->world_save_p
25569 = (WORLD_SAVE_P (info)
25570 && DEFAULT_ABI == ABI_DARWIN
25571 && !cfun->has_nonlocal_label
25572 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
25573 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
25574 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
25575 && info->cr_save_p);
25577 /* This will not work in conjunction with sibcalls. Make sure there
25578 are none. (This check is expensive, but seldom executed.) */
25579 if (WORLD_SAVE_P (info))
25581 rtx_insn *insn;
25582 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
25583 if (CALL_P (insn) && SIBLING_CALL_P (insn))
25585 info->world_save_p = 0;
25586 break;
25590 if (WORLD_SAVE_P (info))
25592 /* Even if we're not touching VRsave, make sure there's room on the
25593 stack for it, if it looks like we're calling SAVE_WORLD, which
25594 will attempt to save it. */
25595 info->vrsave_size = 4;
25597 /* If we are going to save the world, we need to save the link register too. */
25598 info->lr_save_p = 1;
25600 /* "Save" the VRsave register too if we're saving the world. */
25601 if (info->vrsave_mask == 0)
25602 info->vrsave_mask = compute_vrsave_mask ();
25604 /* Because the Darwin register save/restore routines only handle
25605 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
25606 check. */
25607 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
25608 && (info->first_altivec_reg_save
25609 >= FIRST_SAVED_ALTIVEC_REGNO));
25612 return;
25616 static void
25617 is_altivec_return_reg (rtx reg, void *xyes)
25619 bool *yes = (bool *) xyes;
25620 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
25621 *yes = true;
25625 /* Return whether REG is a global user reg or has been specifed by
25626 -ffixed-REG. We should not restore these, and so cannot use
25627 lmw or out-of-line restore functions if there are any. We also
25628 can't save them (well, emit frame notes for them), because frame
25629 unwinding during exception handling will restore saved registers. */
25631 static bool
25632 fixed_reg_p (int reg)
25634 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
25635 backend sets it, overriding anything the user might have given. */
25636 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
25637 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
25638 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25639 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
25640 return false;
25642 return fixed_regs[reg];
25645 /* Determine the strategy for savings/restoring registers. */
25647 enum {
25648 SAVE_MULTIPLE = 0x1,
25649 SAVE_INLINE_GPRS = 0x2,
25650 SAVE_INLINE_FPRS = 0x4,
25651 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
25652 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
25653 SAVE_INLINE_VRS = 0x20,
25654 REST_MULTIPLE = 0x100,
25655 REST_INLINE_GPRS = 0x200,
25656 REST_INLINE_FPRS = 0x400,
25657 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
25658 REST_INLINE_VRS = 0x1000
25661 static int
25662 rs6000_savres_strategy (rs6000_stack_t *info,
25663 bool using_static_chain_p)
25665 int strategy = 0;
25667 /* Select between in-line and out-of-line save and restore of regs.
25668 First, all the obvious cases where we don't use out-of-line. */
25669 if (crtl->calls_eh_return
25670 || cfun->machine->ra_need_lr)
25671 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
25672 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
25673 | SAVE_INLINE_VRS | REST_INLINE_VRS);
25675 if (info->first_gp_reg_save == 32)
25676 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25678 if (info->first_fp_reg_save == 64
25679 /* The out-of-line FP routines use double-precision stores;
25680 we can't use those routines if we don't have such stores. */
25681 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
25682 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25684 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
25685 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25687 /* Define cutoff for using out-of-line functions to save registers. */
25688 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
25690 if (!optimize_size)
25692 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25693 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25694 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25696 else
25698 /* Prefer out-of-line restore if it will exit. */
25699 if (info->first_fp_reg_save > 61)
25700 strategy |= SAVE_INLINE_FPRS;
25701 if (info->first_gp_reg_save > 29)
25703 if (info->first_fp_reg_save == 64)
25704 strategy |= SAVE_INLINE_GPRS;
25705 else
25706 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25708 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
25709 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25712 else if (DEFAULT_ABI == ABI_DARWIN)
25714 if (info->first_fp_reg_save > 60)
25715 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25716 if (info->first_gp_reg_save > 29)
25717 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25718 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25720 else
25722 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25723 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
25724 || info->first_fp_reg_save > 61)
25725 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25726 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25727 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25730 /* Don't bother to try to save things out-of-line if r11 is occupied
25731 by the static chain. It would require too much fiddling and the
25732 static chain is rarely used anyway. FPRs are saved w.r.t the stack
25733 pointer on Darwin, and AIX uses r1 or r12. */
25734 if (using_static_chain_p
25735 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25736 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
25737 | SAVE_INLINE_GPRS
25738 | SAVE_INLINE_VRS);
25740 /* Saving CR interferes with the exit routines used on the SPE, so
25741 just punt here. */
25742 if (TARGET_SPE_ABI
25743 && info->spe_64bit_regs_used
25744 && info->cr_save_p)
25745 strategy |= REST_INLINE_GPRS;
25747 /* We can only use the out-of-line routines to restore fprs if we've
25748 saved all the registers from first_fp_reg_save in the prologue.
25749 Otherwise, we risk loading garbage. Of course, if we have saved
25750 out-of-line then we know we haven't skipped any fprs. */
25751 if ((strategy & SAVE_INLINE_FPRS)
25752 && !(strategy & REST_INLINE_FPRS))
25754 int i;
25756 for (i = info->first_fp_reg_save; i < 64; i++)
25757 if (fixed_regs[i] || !save_reg_p (i))
25759 strategy |= REST_INLINE_FPRS;
25760 break;
25764 /* Similarly, for altivec regs. */
25765 if ((strategy & SAVE_INLINE_VRS)
25766 && !(strategy & REST_INLINE_VRS))
25768 int i;
25770 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
25771 if (fixed_regs[i] || !save_reg_p (i))
25773 strategy |= REST_INLINE_VRS;
25774 break;
25778 /* info->lr_save_p isn't yet set if the only reason lr needs to be
25779 saved is an out-of-line save or restore. Set up the value for
25780 the next test (excluding out-of-line gprs). */
25781 bool lr_save_p = (info->lr_save_p
25782 || !(strategy & SAVE_INLINE_FPRS)
25783 || !(strategy & SAVE_INLINE_VRS)
25784 || !(strategy & REST_INLINE_FPRS)
25785 || !(strategy & REST_INLINE_VRS));
25787 if (TARGET_MULTIPLE
25788 && !TARGET_POWERPC64
25789 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
25790 && info->first_gp_reg_save < 31
25791 && !(flag_shrink_wrap
25792 && flag_shrink_wrap_separate
25793 && optimize_function_for_speed_p (cfun)))
25795 /* Prefer store multiple for saves over out-of-line routines,
25796 since the store-multiple instruction will always be smaller. */
25797 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
25799 /* The situation is more complicated with load multiple. We'd
25800 prefer to use the out-of-line routines for restores, since the
25801 "exit" out-of-line routines can handle the restore of LR and the
25802 frame teardown. However if doesn't make sense to use the
25803 out-of-line routine if that is the only reason we'd need to save
25804 LR, and we can't use the "exit" out-of-line gpr restore if we
25805 have saved some fprs; In those cases it is advantageous to use
25806 load multiple when available. */
25807 if (info->first_fp_reg_save != 64 || !lr_save_p)
25808 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
25811 /* Using the "exit" out-of-line routine does not improve code size
25812 if using it would require lr to be saved and if only saving one
25813 or two gprs. */
25814 else if (!lr_save_p && info->first_gp_reg_save > 29)
25815 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25817 /* We can only use load multiple or the out-of-line routines to
25818 restore gprs if we've saved all the registers from
25819 first_gp_reg_save. Otherwise, we risk loading garbage.
25820 Of course, if we have saved out-of-line or used stmw then we know
25821 we haven't skipped any gprs. */
25822 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
25823 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
25825 int i;
25827 for (i = info->first_gp_reg_save; i < 32; i++)
25828 if (fixed_reg_p (i) || !save_reg_p (i))
25830 strategy |= REST_INLINE_GPRS;
25831 strategy &= ~REST_MULTIPLE;
25832 break;
25836 if (TARGET_ELF && TARGET_64BIT)
25838 if (!(strategy & SAVE_INLINE_FPRS))
25839 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25840 else if (!(strategy & SAVE_INLINE_GPRS)
25841 && info->first_fp_reg_save == 64)
25842 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
25844 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
25845 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
25847 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
25848 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25850 return strategy;
25853 /* Calculate the stack information for the current function. This is
25854 complicated by having two separate calling sequences, the AIX calling
25855 sequence and the V.4 calling sequence.
25857 AIX (and Darwin/Mac OS X) stack frames look like:
25858 32-bit 64-bit
25859 SP----> +---------------------------------------+
25860 | back chain to caller | 0 0
25861 +---------------------------------------+
25862 | saved CR | 4 8 (8-11)
25863 +---------------------------------------+
25864 | saved LR | 8 16
25865 +---------------------------------------+
25866 | reserved for compilers | 12 24
25867 +---------------------------------------+
25868 | reserved for binders | 16 32
25869 +---------------------------------------+
25870 | saved TOC pointer | 20 40
25871 +---------------------------------------+
25872 | Parameter save area (P) | 24 48
25873 +---------------------------------------+
25874 | Alloca space (A) | 24+P etc.
25875 +---------------------------------------+
25876 | Local variable space (L) | 24+P+A
25877 +---------------------------------------+
25878 | Float/int conversion temporary (X) | 24+P+A+L
25879 +---------------------------------------+
25880 | Save area for AltiVec registers (W) | 24+P+A+L+X
25881 +---------------------------------------+
25882 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
25883 +---------------------------------------+
25884 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
25885 +---------------------------------------+
25886 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
25887 +---------------------------------------+
25888 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
25889 +---------------------------------------+
25890 old SP->| back chain to caller's caller |
25891 +---------------------------------------+
25893 The required alignment for AIX configurations is two words (i.e., 8
25894 or 16 bytes).
25896 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
25898 SP----> +---------------------------------------+
25899 | Back chain to caller | 0
25900 +---------------------------------------+
25901 | Save area for CR | 8
25902 +---------------------------------------+
25903 | Saved LR | 16
25904 +---------------------------------------+
25905 | Saved TOC pointer | 24
25906 +---------------------------------------+
25907 | Parameter save area (P) | 32
25908 +---------------------------------------+
25909 | Alloca space (A) | 32+P
25910 +---------------------------------------+
25911 | Local variable space (L) | 32+P+A
25912 +---------------------------------------+
25913 | Save area for AltiVec registers (W) | 32+P+A+L
25914 +---------------------------------------+
25915 | AltiVec alignment padding (Y) | 32+P+A+L+W
25916 +---------------------------------------+
25917 | Save area for GP registers (G) | 32+P+A+L+W+Y
25918 +---------------------------------------+
25919 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
25920 +---------------------------------------+
25921 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
25922 +---------------------------------------+
25925 V.4 stack frames look like:
25927 SP----> +---------------------------------------+
25928 | back chain to caller | 0
25929 +---------------------------------------+
25930 | caller's saved LR | 4
25931 +---------------------------------------+
25932 | Parameter save area (P) | 8
25933 +---------------------------------------+
25934 | Alloca space (A) | 8+P
25935 +---------------------------------------+
25936 | Varargs save area (V) | 8+P+A
25937 +---------------------------------------+
25938 | Local variable space (L) | 8+P+A+V
25939 +---------------------------------------+
25940 | Float/int conversion temporary (X) | 8+P+A+V+L
25941 +---------------------------------------+
25942 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
25943 +---------------------------------------+
25944 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
25945 +---------------------------------------+
25946 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
25947 +---------------------------------------+
25948 | SPE: area for 64-bit GP registers |
25949 +---------------------------------------+
25950 | SPE alignment padding |
25951 +---------------------------------------+
25952 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
25953 +---------------------------------------+
25954 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
25955 +---------------------------------------+
25956 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
25957 +---------------------------------------+
25958 old SP->| back chain to caller's caller |
25959 +---------------------------------------+
25961 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
25962 given. (But note below and in sysv4.h that we require only 8 and
25963 may round up the size of our stack frame anyways. The historical
25964 reason is early versions of powerpc-linux which didn't properly
25965 align the stack at program startup. A happy side-effect is that
25966 -mno-eabi libraries can be used with -meabi programs.)
25968 The EABI configuration defaults to the V.4 layout. However,
25969 the stack alignment requirements may differ. If -mno-eabi is not
25970 given, the required stack alignment is 8 bytes; if -mno-eabi is
25971 given, the required alignment is 16 bytes. (But see V.4 comment
25972 above.) */
25974 #ifndef ABI_STACK_BOUNDARY
25975 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
25976 #endif
25978 static rs6000_stack_t *
25979 rs6000_stack_info (void)
25981 /* We should never be called for thunks, we are not set up for that. */
25982 gcc_assert (!cfun->is_thunk);
25984 rs6000_stack_t *info = &stack_info;
25985 int reg_size = TARGET_32BIT ? 4 : 8;
25986 int ehrd_size;
25987 int ehcr_size;
25988 int save_align;
25989 int first_gp;
25990 HOST_WIDE_INT non_fixed_size;
25991 bool using_static_chain_p;
25993 if (reload_completed && info->reload_completed)
25994 return info;
25996 memset (info, 0, sizeof (*info));
25997 info->reload_completed = reload_completed;
25999 if (TARGET_SPE)
26001 /* Cache value so we don't rescan instruction chain over and over. */
26002 if (cfun->machine->spe_insn_chain_scanned_p == 0)
26003 cfun->machine->spe_insn_chain_scanned_p
26004 = spe_func_has_64bit_regs_p () + 1;
26005 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
26008 /* Select which calling sequence. */
26009 info->abi = DEFAULT_ABI;
26011 /* Calculate which registers need to be saved & save area size. */
26012 info->first_gp_reg_save = first_reg_to_save ();
26013 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
26014 even if it currently looks like we won't. Reload may need it to
26015 get at a constant; if so, it will have already created a constant
26016 pool entry for it. */
26017 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
26018 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26019 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26020 && crtl->uses_const_pool
26021 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
26022 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
26023 else
26024 first_gp = info->first_gp_reg_save;
26026 info->gp_size = reg_size * (32 - first_gp);
26028 /* For the SPE, we have an additional upper 32-bits on each GPR.
26029 Ideally we should save the entire 64-bits only when the upper
26030 half is used in SIMD instructions. Since we only record
26031 registers live (not the size they are used in), this proves
26032 difficult because we'd have to traverse the instruction chain at
26033 the right time, taking reload into account. This is a real pain,
26034 so we opt to save the GPRs in 64-bits always if but one register
26035 gets used in 64-bits. Otherwise, all the registers in the frame
26036 get saved in 32-bits.
26038 So... since when we save all GPRs (except the SP) in 64-bits, the
26039 traditional GP save area will be empty. */
26040 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26041 info->gp_size = 0;
26043 info->first_fp_reg_save = first_fp_reg_to_save ();
26044 info->fp_size = 8 * (64 - info->first_fp_reg_save);
26046 info->first_altivec_reg_save = first_altivec_reg_to_save ();
26047 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
26048 - info->first_altivec_reg_save);
26050 /* Does this function call anything? */
26051 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
26053 /* Determine if we need to save the condition code registers. */
26054 if (save_reg_p (CR2_REGNO)
26055 || save_reg_p (CR3_REGNO)
26056 || save_reg_p (CR4_REGNO))
26058 info->cr_save_p = 1;
26059 if (DEFAULT_ABI == ABI_V4)
26060 info->cr_size = reg_size;
26063 /* If the current function calls __builtin_eh_return, then we need
26064 to allocate stack space for registers that will hold data for
26065 the exception handler. */
26066 if (crtl->calls_eh_return)
26068 unsigned int i;
26069 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
26070 continue;
26072 /* SPE saves EH registers in 64-bits. */
26073 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
26074 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
26076 else
26077 ehrd_size = 0;
26079 /* In the ELFv2 ABI, we also need to allocate space for separate
26080 CR field save areas if the function calls __builtin_eh_return. */
26081 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26083 /* This hard-codes that we have three call-saved CR fields. */
26084 ehcr_size = 3 * reg_size;
26085 /* We do *not* use the regular CR save mechanism. */
26086 info->cr_save_p = 0;
26088 else
26089 ehcr_size = 0;
26091 /* Determine various sizes. */
26092 info->reg_size = reg_size;
26093 info->fixed_size = RS6000_SAVE_AREA;
26094 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
26095 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
26096 TARGET_ALTIVEC ? 16 : 8);
26097 if (FRAME_GROWS_DOWNWARD)
26098 info->vars_size
26099 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
26100 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
26101 - (info->fixed_size + info->vars_size + info->parm_size);
26103 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26104 info->spe_gp_size = 8 * (32 - first_gp);
26106 if (TARGET_ALTIVEC_ABI)
26107 info->vrsave_mask = compute_vrsave_mask ();
26109 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
26110 info->vrsave_size = 4;
26112 compute_save_world_info (info);
26114 /* Calculate the offsets. */
26115 switch (DEFAULT_ABI)
26117 case ABI_NONE:
26118 default:
26119 gcc_unreachable ();
26121 case ABI_AIX:
26122 case ABI_ELFv2:
26123 case ABI_DARWIN:
26124 info->fp_save_offset = -info->fp_size;
26125 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26127 if (TARGET_ALTIVEC_ABI)
26129 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
26131 /* Align stack so vector save area is on a quadword boundary.
26132 The padding goes above the vectors. */
26133 if (info->altivec_size != 0)
26134 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
26136 info->altivec_save_offset = info->vrsave_save_offset
26137 - info->altivec_padding_size
26138 - info->altivec_size;
26139 gcc_assert (info->altivec_size == 0
26140 || info->altivec_save_offset % 16 == 0);
26142 /* Adjust for AltiVec case. */
26143 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
26145 else
26146 info->ehrd_offset = info->gp_save_offset - ehrd_size;
26148 info->ehcr_offset = info->ehrd_offset - ehcr_size;
26149 info->cr_save_offset = reg_size; /* first word when 64-bit. */
26150 info->lr_save_offset = 2*reg_size;
26151 break;
26153 case ABI_V4:
26154 info->fp_save_offset = -info->fp_size;
26155 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26156 info->cr_save_offset = info->gp_save_offset - info->cr_size;
26158 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26160 /* Align stack so SPE GPR save area is aligned on a
26161 double-word boundary. */
26162 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
26163 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
26164 else
26165 info->spe_padding_size = 0;
26167 info->spe_gp_save_offset = info->cr_save_offset
26168 - info->spe_padding_size
26169 - info->spe_gp_size;
26171 /* Adjust for SPE case. */
26172 info->ehrd_offset = info->spe_gp_save_offset;
26174 else if (TARGET_ALTIVEC_ABI)
26176 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
26178 /* Align stack so vector save area is on a quadword boundary. */
26179 if (info->altivec_size != 0)
26180 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
26182 info->altivec_save_offset = info->vrsave_save_offset
26183 - info->altivec_padding_size
26184 - info->altivec_size;
26186 /* Adjust for AltiVec case. */
26187 info->ehrd_offset = info->altivec_save_offset;
26189 else
26190 info->ehrd_offset = info->cr_save_offset;
26192 info->ehrd_offset -= ehrd_size;
26193 info->lr_save_offset = reg_size;
26196 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
26197 info->save_size = RS6000_ALIGN (info->fp_size
26198 + info->gp_size
26199 + info->altivec_size
26200 + info->altivec_padding_size
26201 + info->spe_gp_size
26202 + info->spe_padding_size
26203 + ehrd_size
26204 + ehcr_size
26205 + info->cr_size
26206 + info->vrsave_size,
26207 save_align);
26209 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
26211 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
26212 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
26214 /* Determine if we need to save the link register. */
26215 if (info->calls_p
26216 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26217 && crtl->profile
26218 && !TARGET_PROFILE_KERNEL)
26219 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
26220 #ifdef TARGET_RELOCATABLE
26221 || (DEFAULT_ABI == ABI_V4
26222 && (TARGET_RELOCATABLE || flag_pic > 1)
26223 && get_pool_size () != 0)
26224 #endif
26225 || rs6000_ra_ever_killed ())
26226 info->lr_save_p = 1;
26228 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26229 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26230 && call_used_regs[STATIC_CHAIN_REGNUM]);
26231 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
26233 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
26234 || !(info->savres_strategy & SAVE_INLINE_FPRS)
26235 || !(info->savres_strategy & SAVE_INLINE_VRS)
26236 || !(info->savres_strategy & REST_INLINE_GPRS)
26237 || !(info->savres_strategy & REST_INLINE_FPRS)
26238 || !(info->savres_strategy & REST_INLINE_VRS))
26239 info->lr_save_p = 1;
26241 if (info->lr_save_p)
26242 df_set_regs_ever_live (LR_REGNO, true);
26244 /* Determine if we need to allocate any stack frame:
26246 For AIX we need to push the stack if a frame pointer is needed
26247 (because the stack might be dynamically adjusted), if we are
26248 debugging, if we make calls, or if the sum of fp_save, gp_save,
26249 and local variables are more than the space needed to save all
26250 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
26251 + 18*8 = 288 (GPR13 reserved).
26253 For V.4 we don't have the stack cushion that AIX uses, but assume
26254 that the debugger can handle stackless frames. */
26256 if (info->calls_p)
26257 info->push_p = 1;
26259 else if (DEFAULT_ABI == ABI_V4)
26260 info->push_p = non_fixed_size != 0;
26262 else if (frame_pointer_needed)
26263 info->push_p = 1;
26265 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
26266 info->push_p = 1;
26268 else
26269 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
26271 return info;
26274 /* Return true if the current function uses any GPRs in 64-bit SIMD
26275 mode. */
26277 static bool
26278 spe_func_has_64bit_regs_p (void)
26280 rtx_insn *insns, *insn;
26282 /* Functions that save and restore all the call-saved registers will
26283 need to save/restore the registers in 64-bits. */
26284 if (crtl->calls_eh_return
26285 || cfun->calls_setjmp
26286 || crtl->has_nonlocal_goto)
26287 return true;
26289 insns = get_insns ();
26291 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
26293 if (INSN_P (insn))
26295 rtx i;
26297 /* FIXME: This should be implemented with attributes...
26299 (set_attr "spe64" "true")....then,
26300 if (get_spe64(insn)) return true;
26302 It's the only reliable way to do the stuff below. */
26304 i = PATTERN (insn);
26305 if (GET_CODE (i) == SET)
26307 machine_mode mode = GET_MODE (SET_SRC (i));
26309 if (SPE_VECTOR_MODE (mode))
26310 return true;
26311 if (TARGET_E500_DOUBLE
26312 && (mode == DFmode || FLOAT128_2REG_P (mode)))
26313 return true;
26318 return false;
26321 static void
26322 debug_stack_info (rs6000_stack_t *info)
26324 const char *abi_string;
26326 if (! info)
26327 info = rs6000_stack_info ();
26329 fprintf (stderr, "\nStack information for function %s:\n",
26330 ((current_function_decl && DECL_NAME (current_function_decl))
26331 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
26332 : "<unknown>"));
26334 switch (info->abi)
26336 default: abi_string = "Unknown"; break;
26337 case ABI_NONE: abi_string = "NONE"; break;
26338 case ABI_AIX: abi_string = "AIX"; break;
26339 case ABI_ELFv2: abi_string = "ELFv2"; break;
26340 case ABI_DARWIN: abi_string = "Darwin"; break;
26341 case ABI_V4: abi_string = "V.4"; break;
26344 fprintf (stderr, "\tABI = %5s\n", abi_string);
26346 if (TARGET_ALTIVEC_ABI)
26347 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
26349 if (TARGET_SPE_ABI)
26350 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
26352 if (info->first_gp_reg_save != 32)
26353 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
26355 if (info->first_fp_reg_save != 64)
26356 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
26358 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
26359 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
26360 info->first_altivec_reg_save);
26362 if (info->lr_save_p)
26363 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
26365 if (info->cr_save_p)
26366 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
26368 if (info->vrsave_mask)
26369 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
26371 if (info->push_p)
26372 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
26374 if (info->calls_p)
26375 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
26377 if (info->gp_size)
26378 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
26380 if (info->fp_size)
26381 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
26383 if (info->altivec_size)
26384 fprintf (stderr, "\taltivec_save_offset = %5d\n",
26385 info->altivec_save_offset);
26387 if (info->spe_gp_size)
26388 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
26389 info->spe_gp_save_offset);
26391 if (info->vrsave_size)
26392 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
26393 info->vrsave_save_offset);
26395 if (info->lr_save_p)
26396 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
26398 if (info->cr_save_p)
26399 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
26401 if (info->varargs_save_offset)
26402 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
26404 if (info->total_size)
26405 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26406 info->total_size);
26408 if (info->vars_size)
26409 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26410 info->vars_size);
26412 if (info->parm_size)
26413 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
26415 if (info->fixed_size)
26416 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
26418 if (info->gp_size)
26419 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
26421 if (info->spe_gp_size)
26422 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
26424 if (info->fp_size)
26425 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
26427 if (info->altivec_size)
26428 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
26430 if (info->vrsave_size)
26431 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
26433 if (info->altivec_padding_size)
26434 fprintf (stderr, "\taltivec_padding_size= %5d\n",
26435 info->altivec_padding_size);
26437 if (info->spe_padding_size)
26438 fprintf (stderr, "\tspe_padding_size = %5d\n",
26439 info->spe_padding_size);
26441 if (info->cr_size)
26442 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
26444 if (info->save_size)
26445 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
26447 if (info->reg_size != 4)
26448 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
26450 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
26452 fprintf (stderr, "\n");
26456 rs6000_return_addr (int count, rtx frame)
26458 /* Currently we don't optimize very well between prolog and body
26459 code and for PIC code the code can be actually quite bad, so
26460 don't try to be too clever here. */
26461 if (count != 0
26462 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
26464 cfun->machine->ra_needs_full_frame = 1;
26466 return
26467 gen_rtx_MEM
26468 (Pmode,
26469 memory_address
26470 (Pmode,
26471 plus_constant (Pmode,
26472 copy_to_reg
26473 (gen_rtx_MEM (Pmode,
26474 memory_address (Pmode, frame))),
26475 RETURN_ADDRESS_OFFSET)));
26478 cfun->machine->ra_need_lr = 1;
26479 return get_hard_reg_initial_val (Pmode, LR_REGNO);
26482 /* Say whether a function is a candidate for sibcall handling or not. */
26484 static bool
26485 rs6000_function_ok_for_sibcall (tree decl, tree exp)
26487 tree fntype;
26489 if (decl)
26490 fntype = TREE_TYPE (decl);
26491 else
26492 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
26494 /* We can't do it if the called function has more vector parameters
26495 than the current function; there's nowhere to put the VRsave code. */
26496 if (TARGET_ALTIVEC_ABI
26497 && TARGET_ALTIVEC_VRSAVE
26498 && !(decl && decl == current_function_decl))
26500 function_args_iterator args_iter;
26501 tree type;
26502 int nvreg = 0;
26504 /* Functions with vector parameters are required to have a
26505 prototype, so the argument type info must be available
26506 here. */
26507 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
26508 if (TREE_CODE (type) == VECTOR_TYPE
26509 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26510 nvreg++;
26512 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
26513 if (TREE_CODE (type) == VECTOR_TYPE
26514 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26515 nvreg--;
26517 if (nvreg > 0)
26518 return false;
26521 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
26522 functions, because the callee may have a different TOC pointer to
26523 the caller and there's no way to ensure we restore the TOC when
26524 we return. With the secure-plt SYSV ABI we can't make non-local
26525 calls when -fpic/PIC because the plt call stubs use r30. */
26526 if (DEFAULT_ABI == ABI_DARWIN
26527 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26528 && decl
26529 && !DECL_EXTERNAL (decl)
26530 && !DECL_WEAK (decl)
26531 && (*targetm.binds_local_p) (decl))
26532 || (DEFAULT_ABI == ABI_V4
26533 && (!TARGET_SECURE_PLT
26534 || !flag_pic
26535 || (decl
26536 && (*targetm.binds_local_p) (decl)))))
26538 tree attr_list = TYPE_ATTRIBUTES (fntype);
26540 if (!lookup_attribute ("longcall", attr_list)
26541 || lookup_attribute ("shortcall", attr_list))
26542 return true;
26545 return false;
26548 static int
26549 rs6000_ra_ever_killed (void)
26551 rtx_insn *top;
26552 rtx reg;
26553 rtx_insn *insn;
26555 if (cfun->is_thunk)
26556 return 0;
26558 if (cfun->machine->lr_save_state)
26559 return cfun->machine->lr_save_state - 1;
26561 /* regs_ever_live has LR marked as used if any sibcalls are present,
26562 but this should not force saving and restoring in the
26563 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
26564 clobbers LR, so that is inappropriate. */
26566 /* Also, the prologue can generate a store into LR that
26567 doesn't really count, like this:
26569 move LR->R0
26570 bcl to set PIC register
26571 move LR->R31
26572 move R0->LR
26574 When we're called from the epilogue, we need to avoid counting
26575 this as a store. */
26577 push_topmost_sequence ();
26578 top = get_insns ();
26579 pop_topmost_sequence ();
26580 reg = gen_rtx_REG (Pmode, LR_REGNO);
26582 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
26584 if (INSN_P (insn))
26586 if (CALL_P (insn))
26588 if (!SIBLING_CALL_P (insn))
26589 return 1;
26591 else if (find_regno_note (insn, REG_INC, LR_REGNO))
26592 return 1;
26593 else if (set_of (reg, insn) != NULL_RTX
26594 && !prologue_epilogue_contains (insn))
26595 return 1;
26598 return 0;
26601 /* Emit instructions needed to load the TOC register.
26602 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
26603 a constant pool; or for SVR4 -fpic. */
26605 void
26606 rs6000_emit_load_toc_table (int fromprolog)
26608 rtx dest;
26609 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26611 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
26613 char buf[30];
26614 rtx lab, tmp1, tmp2, got;
26616 lab = gen_label_rtx ();
26617 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
26618 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26619 if (flag_pic == 2)
26621 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26622 need_toc_init = 1;
26624 else
26625 got = rs6000_got_sym ();
26626 tmp1 = tmp2 = dest;
26627 if (!fromprolog)
26629 tmp1 = gen_reg_rtx (Pmode);
26630 tmp2 = gen_reg_rtx (Pmode);
26632 emit_insn (gen_load_toc_v4_PIC_1 (lab));
26633 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
26634 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
26635 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
26637 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
26639 emit_insn (gen_load_toc_v4_pic_si ());
26640 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26642 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
26644 char buf[30];
26645 rtx temp0 = (fromprolog
26646 ? gen_rtx_REG (Pmode, 0)
26647 : gen_reg_rtx (Pmode));
26649 if (fromprolog)
26651 rtx symF, symL;
26653 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26654 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26656 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26657 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26659 emit_insn (gen_load_toc_v4_PIC_1 (symF));
26660 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26661 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
26663 else
26665 rtx tocsym, lab;
26667 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26668 need_toc_init = 1;
26669 lab = gen_label_rtx ();
26670 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
26671 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26672 if (TARGET_LINK_STACK)
26673 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
26674 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
26676 emit_insn (gen_addsi3 (dest, temp0, dest));
26678 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
26680 /* This is for AIX code running in non-PIC ELF32. */
26681 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26683 need_toc_init = 1;
26684 emit_insn (gen_elf_high (dest, realsym));
26685 emit_insn (gen_elf_low (dest, dest, realsym));
26687 else
26689 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26691 if (TARGET_32BIT)
26692 emit_insn (gen_load_toc_aix_si (dest));
26693 else
26694 emit_insn (gen_load_toc_aix_di (dest));
26698 /* Emit instructions to restore the link register after determining where
26699 its value has been stored. */
26701 void
26702 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
26704 rs6000_stack_t *info = rs6000_stack_info ();
26705 rtx operands[2];
26707 operands[0] = source;
26708 operands[1] = scratch;
26710 if (info->lr_save_p)
26712 rtx frame_rtx = stack_pointer_rtx;
26713 HOST_WIDE_INT sp_offset = 0;
26714 rtx tmp;
26716 if (frame_pointer_needed
26717 || cfun->calls_alloca
26718 || info->total_size > 32767)
26720 tmp = gen_frame_mem (Pmode, frame_rtx);
26721 emit_move_insn (operands[1], tmp);
26722 frame_rtx = operands[1];
26724 else if (info->push_p)
26725 sp_offset = info->total_size;
26727 tmp = plus_constant (Pmode, frame_rtx,
26728 info->lr_save_offset + sp_offset);
26729 tmp = gen_frame_mem (Pmode, tmp);
26730 emit_move_insn (tmp, operands[0]);
26732 else
26733 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
26735 /* Freeze lr_save_p. We've just emitted rtl that depends on the
26736 state of lr_save_p so any change from here on would be a bug. In
26737 particular, stop rs6000_ra_ever_killed from considering the SET
26738 of lr we may have added just above. */
26739 cfun->machine->lr_save_state = info->lr_save_p + 1;
26742 static GTY(()) alias_set_type set = -1;
26744 alias_set_type
26745 get_TOC_alias_set (void)
26747 if (set == -1)
26748 set = new_alias_set ();
26749 return set;
26752 /* This returns nonzero if the current function uses the TOC. This is
26753 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
26754 is generated by the ABI_V4 load_toc_* patterns. */
26755 #if TARGET_ELF
26756 static int
26757 uses_TOC (void)
26759 rtx_insn *insn;
26761 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26762 if (INSN_P (insn))
26764 rtx pat = PATTERN (insn);
26765 int i;
26767 if (GET_CODE (pat) == PARALLEL)
26768 for (i = 0; i < XVECLEN (pat, 0); i++)
26770 rtx sub = XVECEXP (pat, 0, i);
26771 if (GET_CODE (sub) == USE)
26773 sub = XEXP (sub, 0);
26774 if (GET_CODE (sub) == UNSPEC
26775 && XINT (sub, 1) == UNSPEC_TOC)
26776 return 1;
26780 return 0;
26782 #endif
26785 create_TOC_reference (rtx symbol, rtx largetoc_reg)
26787 rtx tocrel, tocreg, hi;
26789 if (TARGET_DEBUG_ADDR)
26791 if (GET_CODE (symbol) == SYMBOL_REF)
26792 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
26793 XSTR (symbol, 0));
26794 else
26796 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
26797 GET_RTX_NAME (GET_CODE (symbol)));
26798 debug_rtx (symbol);
26802 if (!can_create_pseudo_p ())
26803 df_set_regs_ever_live (TOC_REGISTER, true);
26805 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
26806 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
26807 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
26808 return tocrel;
26810 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
26811 if (largetoc_reg != NULL)
26813 emit_move_insn (largetoc_reg, hi);
26814 hi = largetoc_reg;
26816 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
26819 /* Issue assembly directives that create a reference to the given DWARF
26820 FRAME_TABLE_LABEL from the current function section. */
26821 void
26822 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
26824 fprintf (asm_out_file, "\t.ref %s\n",
26825 (* targetm.strip_name_encoding) (frame_table_label));
26828 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
26829 and the change to the stack pointer. */
26831 static void
26832 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
26834 rtvec p;
26835 int i;
26836 rtx regs[3];
26838 i = 0;
26839 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26840 if (hard_frame_needed)
26841 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
26842 if (!(REGNO (fp) == STACK_POINTER_REGNUM
26843 || (hard_frame_needed
26844 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
26845 regs[i++] = fp;
26847 p = rtvec_alloc (i);
26848 while (--i >= 0)
26850 rtx mem = gen_frame_mem (BLKmode, regs[i]);
26851 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
26854 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
26857 /* Emit the correct code for allocating stack space, as insns.
26858 If COPY_REG, make sure a copy of the old frame is left there.
26859 The generated code may use hard register 0 as a temporary. */
26861 static rtx_insn *
26862 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
26864 rtx_insn *insn;
26865 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26866 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
26867 rtx todec = gen_int_mode (-size, Pmode);
26868 rtx par, set, mem;
26870 if (INTVAL (todec) != -size)
26872 warning (0, "stack frame too large");
26873 emit_insn (gen_trap ());
26874 return 0;
26877 if (crtl->limit_stack)
26879 if (REG_P (stack_limit_rtx)
26880 && REGNO (stack_limit_rtx) > 1
26881 && REGNO (stack_limit_rtx) <= 31)
26883 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
26884 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26885 const0_rtx));
26887 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
26888 && TARGET_32BIT
26889 && DEFAULT_ABI == ABI_V4)
26891 rtx toload = gen_rtx_CONST (VOIDmode,
26892 gen_rtx_PLUS (Pmode,
26893 stack_limit_rtx,
26894 GEN_INT (size)));
26896 emit_insn (gen_elf_high (tmp_reg, toload));
26897 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
26898 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26899 const0_rtx));
26901 else
26902 warning (0, "stack limit expression is not supported");
26905 if (copy_reg)
26907 if (copy_off != 0)
26908 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
26909 else
26910 emit_move_insn (copy_reg, stack_reg);
26913 if (size > 32767)
26915 /* Need a note here so that try_split doesn't get confused. */
26916 if (get_last_insn () == NULL_RTX)
26917 emit_note (NOTE_INSN_DELETED);
26918 insn = emit_move_insn (tmp_reg, todec);
26919 try_split (PATTERN (insn), insn, 0);
26920 todec = tmp_reg;
26923 insn = emit_insn (TARGET_32BIT
26924 ? gen_movsi_update_stack (stack_reg, stack_reg,
26925 todec, stack_reg)
26926 : gen_movdi_di_update_stack (stack_reg, stack_reg,
26927 todec, stack_reg));
26928 /* Since we didn't use gen_frame_mem to generate the MEM, grab
26929 it now and set the alias set/attributes. The above gen_*_update
26930 calls will generate a PARALLEL with the MEM set being the first
26931 operation. */
26932 par = PATTERN (insn);
26933 gcc_assert (GET_CODE (par) == PARALLEL);
26934 set = XVECEXP (par, 0, 0);
26935 gcc_assert (GET_CODE (set) == SET);
26936 mem = SET_DEST (set);
26937 gcc_assert (MEM_P (mem));
26938 MEM_NOTRAP_P (mem) = 1;
26939 set_mem_alias_set (mem, get_frame_alias_set ());
26941 RTX_FRAME_RELATED_P (insn) = 1;
26942 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26943 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
26944 GEN_INT (-size))));
26945 return insn;
26948 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
26950 #if PROBE_INTERVAL > 32768
26951 #error Cannot use indexed addressing mode for stack probing
26952 #endif
26954 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
26955 inclusive. These are offsets from the current stack pointer. */
26957 static void
26958 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
26960 /* See if we have a constant small number of probes to generate. If so,
26961 that's the easy case. */
26962 if (first + size <= 32768)
26964 HOST_WIDE_INT i;
26966 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
26967 it exceeds SIZE. If only one probe is needed, this will not
26968 generate any code. Then probe at FIRST + SIZE. */
26969 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
26970 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26971 -(first + i)));
26973 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26974 -(first + size)));
26977 /* Otherwise, do the same as above, but in a loop. Note that we must be
26978 extra careful with variables wrapping around because we might be at
26979 the very top (or the very bottom) of the address space and we have
26980 to be able to handle this case properly; in particular, we use an
26981 equality test for the loop condition. */
26982 else
26984 HOST_WIDE_INT rounded_size;
26985 rtx r12 = gen_rtx_REG (Pmode, 12);
26986 rtx r0 = gen_rtx_REG (Pmode, 0);
26988 /* Sanity check for the addressing mode we're going to use. */
26989 gcc_assert (first <= 32768);
26991 /* Step 1: round SIZE to the previous multiple of the interval. */
26993 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
26996 /* Step 2: compute initial and final value of the loop counter. */
26998 /* TEST_ADDR = SP + FIRST. */
26999 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
27000 -first)));
27002 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
27003 if (rounded_size > 32768)
27005 emit_move_insn (r0, GEN_INT (-rounded_size));
27006 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
27008 else
27009 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
27010 -rounded_size)));
27013 /* Step 3: the loop
27017 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
27018 probe at TEST_ADDR
27020 while (TEST_ADDR != LAST_ADDR)
27022 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
27023 until it is equal to ROUNDED_SIZE. */
27025 if (TARGET_64BIT)
27026 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
27027 else
27028 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
27031 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
27032 that SIZE is equal to ROUNDED_SIZE. */
27034 if (size != rounded_size)
27035 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
27039 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
27040 absolute addresses. */
27042 const char *
27043 output_probe_stack_range (rtx reg1, rtx reg2)
27045 static int labelno = 0;
27046 char loop_lab[32];
27047 rtx xops[2];
27049 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
27051 /* Loop. */
27052 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
27054 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
27055 xops[0] = reg1;
27056 xops[1] = GEN_INT (-PROBE_INTERVAL);
27057 output_asm_insn ("addi %0,%0,%1", xops);
27059 /* Probe at TEST_ADDR. */
27060 xops[1] = gen_rtx_REG (Pmode, 0);
27061 output_asm_insn ("stw %1,0(%0)", xops);
27063 /* Test if TEST_ADDR == LAST_ADDR. */
27064 xops[1] = reg2;
27065 if (TARGET_64BIT)
27066 output_asm_insn ("cmpd 0,%0,%1", xops);
27067 else
27068 output_asm_insn ("cmpw 0,%0,%1", xops);
27070 /* Branch. */
27071 fputs ("\tbne 0,", asm_out_file);
27072 assemble_name_raw (asm_out_file, loop_lab);
27073 fputc ('\n', asm_out_file);
27075 return "";
27078 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
27079 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
27080 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
27081 deduce these equivalences by itself so it wasn't necessary to hold
27082 its hand so much. Don't be tempted to always supply d2_f_d_e with
27083 the actual cfa register, ie. r31 when we are using a hard frame
27084 pointer. That fails when saving regs off r1, and sched moves the
27085 r31 setup past the reg saves. */
27087 static rtx_insn *
27088 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
27089 rtx reg2, rtx repl2)
27091 rtx repl;
27093 if (REGNO (reg) == STACK_POINTER_REGNUM)
27095 gcc_checking_assert (val == 0);
27096 repl = NULL_RTX;
27098 else
27099 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27100 GEN_INT (val));
27102 rtx pat = PATTERN (insn);
27103 if (!repl && !reg2)
27105 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
27106 if (GET_CODE (pat) == PARALLEL)
27107 for (int i = 0; i < XVECLEN (pat, 0); i++)
27108 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27110 rtx set = XVECEXP (pat, 0, i);
27112 /* If this PARALLEL has been emitted for out-of-line
27113 register save functions, or store multiple, then omit
27114 eh_frame info for any user-defined global regs. If
27115 eh_frame info is supplied, frame unwinding will
27116 restore a user reg. */
27117 if (!REG_P (SET_SRC (set))
27118 || !fixed_reg_p (REGNO (SET_SRC (set))))
27119 RTX_FRAME_RELATED_P (set) = 1;
27121 RTX_FRAME_RELATED_P (insn) = 1;
27122 return insn;
27125 /* We expect that 'pat' is either a SET or a PARALLEL containing
27126 SETs (and possibly other stuff). In a PARALLEL, all the SETs
27127 are important so they all have to be marked RTX_FRAME_RELATED_P.
27128 Call simplify_replace_rtx on the SETs rather than the whole insn
27129 so as to leave the other stuff alone (for example USE of r12). */
27131 if (GET_CODE (pat) == SET)
27133 if (repl)
27134 pat = simplify_replace_rtx (pat, reg, repl);
27135 if (reg2)
27136 pat = simplify_replace_rtx (pat, reg2, repl2);
27138 else if (GET_CODE (pat) == PARALLEL)
27140 pat = shallow_copy_rtx (pat);
27141 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
27143 for (int i = 0; i < XVECLEN (pat, 0); i++)
27144 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27146 rtx set = XVECEXP (pat, 0, i);
27148 if (repl)
27149 set = simplify_replace_rtx (set, reg, repl);
27150 if (reg2)
27151 set = simplify_replace_rtx (set, reg2, repl2);
27152 XVECEXP (pat, 0, i) = set;
27154 /* Omit eh_frame info for any user-defined global regs. */
27155 if (!REG_P (SET_SRC (set))
27156 || !fixed_reg_p (REGNO (SET_SRC (set))))
27157 RTX_FRAME_RELATED_P (set) = 1;
27160 else
27161 gcc_unreachable ();
27163 RTX_FRAME_RELATED_P (insn) = 1;
27164 if (repl || reg2)
27165 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
27167 return insn;
27170 /* Returns an insn that has a vrsave set operation with the
27171 appropriate CLOBBERs. */
27173 static rtx
27174 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
27176 int nclobs, i;
27177 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
27178 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27180 clobs[0]
27181 = gen_rtx_SET (vrsave,
27182 gen_rtx_UNSPEC_VOLATILE (SImode,
27183 gen_rtvec (2, reg, vrsave),
27184 UNSPECV_SET_VRSAVE));
27186 nclobs = 1;
27188 /* We need to clobber the registers in the mask so the scheduler
27189 does not move sets to VRSAVE before sets of AltiVec registers.
27191 However, if the function receives nonlocal gotos, reload will set
27192 all call saved registers live. We will end up with:
27194 (set (reg 999) (mem))
27195 (parallel [ (set (reg vrsave) (unspec blah))
27196 (clobber (reg 999))])
27198 The clobber will cause the store into reg 999 to be dead, and
27199 flow will attempt to delete an epilogue insn. In this case, we
27200 need an unspec use/set of the register. */
27202 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27203 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27205 if (!epiloguep || call_used_regs [i])
27206 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
27207 gen_rtx_REG (V4SImode, i));
27208 else
27210 rtx reg = gen_rtx_REG (V4SImode, i);
27212 clobs[nclobs++]
27213 = gen_rtx_SET (reg,
27214 gen_rtx_UNSPEC (V4SImode,
27215 gen_rtvec (1, reg), 27));
27219 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
27221 for (i = 0; i < nclobs; ++i)
27222 XVECEXP (insn, 0, i) = clobs[i];
27224 return insn;
27227 static rtx
27228 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
27230 rtx addr, mem;
27232 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
27233 mem = gen_frame_mem (GET_MODE (reg), addr);
27234 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
27237 static rtx
27238 gen_frame_load (rtx reg, rtx frame_reg, int offset)
27240 return gen_frame_set (reg, frame_reg, offset, false);
27243 static rtx
27244 gen_frame_store (rtx reg, rtx frame_reg, int offset)
27246 return gen_frame_set (reg, frame_reg, offset, true);
27249 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
27250 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
27252 static rtx_insn *
27253 emit_frame_save (rtx frame_reg, machine_mode mode,
27254 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
27256 rtx reg;
27258 /* Some cases that need register indexed addressing. */
27259 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
27260 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
27261 || (TARGET_E500_DOUBLE && mode == DFmode)
27262 || (TARGET_SPE_ABI
27263 && SPE_VECTOR_MODE (mode)
27264 && !SPE_CONST_OFFSET_OK (offset))));
27266 reg = gen_rtx_REG (mode, regno);
27267 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
27268 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
27269 NULL_RTX, NULL_RTX);
27272 /* Emit an offset memory reference suitable for a frame store, while
27273 converting to a valid addressing mode. */
27275 static rtx
27276 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
27278 rtx int_rtx, offset_rtx;
27280 int_rtx = GEN_INT (offset);
27282 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
27283 || (TARGET_E500_DOUBLE && mode == DFmode))
27285 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
27286 emit_move_insn (offset_rtx, int_rtx);
27288 else
27289 offset_rtx = int_rtx;
27291 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
27294 #ifndef TARGET_FIX_AND_CONTINUE
27295 #define TARGET_FIX_AND_CONTINUE 0
27296 #endif
27298 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
27299 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
27300 #define LAST_SAVRES_REGISTER 31
27301 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
27303 enum {
27304 SAVRES_LR = 0x1,
27305 SAVRES_SAVE = 0x2,
27306 SAVRES_REG = 0x0c,
27307 SAVRES_GPR = 0,
27308 SAVRES_FPR = 4,
27309 SAVRES_VR = 8
27312 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
27314 /* Temporary holding space for an out-of-line register save/restore
27315 routine name. */
27316 static char savres_routine_name[30];
27318 /* Return the name for an out-of-line register save/restore routine.
27319 We are saving/restoring GPRs if GPR is true. */
27321 static char *
27322 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
27324 const char *prefix = "";
27325 const char *suffix = "";
27327 /* Different targets are supposed to define
27328 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
27329 routine name could be defined with:
27331 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
27333 This is a nice idea in practice, but in reality, things are
27334 complicated in several ways:
27336 - ELF targets have save/restore routines for GPRs.
27338 - SPE targets use different prefixes for 32/64-bit registers, and
27339 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
27341 - PPC64 ELF targets have routines for save/restore of GPRs that
27342 differ in what they do with the link register, so having a set
27343 prefix doesn't work. (We only use one of the save routines at
27344 the moment, though.)
27346 - PPC32 elf targets have "exit" versions of the restore routines
27347 that restore the link register and can save some extra space.
27348 These require an extra suffix. (There are also "tail" versions
27349 of the restore routines and "GOT" versions of the save routines,
27350 but we don't generate those at present. Same problems apply,
27351 though.)
27353 We deal with all this by synthesizing our own prefix/suffix and
27354 using that for the simple sprintf call shown above. */
27355 if (TARGET_SPE)
27357 /* No floating point saves on the SPE. */
27358 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
27360 if ((sel & SAVRES_SAVE))
27361 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
27362 else
27363 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
27365 if ((sel & SAVRES_LR))
27366 suffix = "_x";
27368 else if (DEFAULT_ABI == ABI_V4)
27370 if (TARGET_64BIT)
27371 goto aix_names;
27373 if ((sel & SAVRES_REG) == SAVRES_GPR)
27374 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
27375 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27376 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
27377 else if ((sel & SAVRES_REG) == SAVRES_VR)
27378 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27379 else
27380 abort ();
27382 if ((sel & SAVRES_LR))
27383 suffix = "_x";
27385 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27387 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
27388 /* No out-of-line save/restore routines for GPRs on AIX. */
27389 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
27390 #endif
27392 aix_names:
27393 if ((sel & SAVRES_REG) == SAVRES_GPR)
27394 prefix = ((sel & SAVRES_SAVE)
27395 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
27396 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
27397 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27399 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27400 if ((sel & SAVRES_LR))
27401 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
27402 else
27403 #endif
27405 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
27406 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
27409 else if ((sel & SAVRES_REG) == SAVRES_VR)
27410 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27411 else
27412 abort ();
27415 if (DEFAULT_ABI == ABI_DARWIN)
27417 /* The Darwin approach is (slightly) different, in order to be
27418 compatible with code generated by the system toolchain. There is a
27419 single symbol for the start of save sequence, and the code here
27420 embeds an offset into that code on the basis of the first register
27421 to be saved. */
27422 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
27423 if ((sel & SAVRES_REG) == SAVRES_GPR)
27424 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
27425 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
27426 (regno - 13) * 4, prefix, regno);
27427 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27428 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
27429 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
27430 else if ((sel & SAVRES_REG) == SAVRES_VR)
27431 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
27432 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
27433 else
27434 abort ();
27436 else
27437 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
27439 return savres_routine_name;
27442 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
27443 We are saving/restoring GPRs if GPR is true. */
27445 static rtx
27446 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
27448 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
27449 ? info->first_gp_reg_save
27450 : (sel & SAVRES_REG) == SAVRES_FPR
27451 ? info->first_fp_reg_save - 32
27452 : (sel & SAVRES_REG) == SAVRES_VR
27453 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
27454 : -1);
27455 rtx sym;
27456 int select = sel;
27458 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
27459 versions of the gpr routines. */
27460 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
27461 && info->spe_64bit_regs_used)
27462 select ^= SAVRES_FPR ^ SAVRES_GPR;
27464 /* Don't generate bogus routine names. */
27465 gcc_assert (FIRST_SAVRES_REGISTER <= regno
27466 && regno <= LAST_SAVRES_REGISTER
27467 && select >= 0 && select <= 12);
27469 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
27471 if (sym == NULL)
27473 char *name;
27475 name = rs6000_savres_routine_name (info, regno, sel);
27477 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
27478 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
27479 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
27482 return sym;
27485 /* Emit a sequence of insns, including a stack tie if needed, for
27486 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
27487 reset the stack pointer, but move the base of the frame into
27488 reg UPDT_REGNO for use by out-of-line register restore routines. */
27490 static rtx
27491 rs6000_emit_stack_reset (rs6000_stack_t *info,
27492 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
27493 unsigned updt_regno)
27495 rtx updt_reg_rtx;
27497 /* This blockage is needed so that sched doesn't decide to move
27498 the sp change before the register restores. */
27499 if (DEFAULT_ABI == ABI_V4
27500 || (TARGET_SPE_ABI
27501 && info->spe_64bit_regs_used != 0
27502 && info->first_gp_reg_save != 32))
27503 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
27505 /* If we are restoring registers out-of-line, we will be using the
27506 "exit" variants of the restore routines, which will reset the
27507 stack for us. But we do need to point updt_reg into the
27508 right place for those routines. */
27509 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
27511 if (frame_off != 0)
27512 return emit_insn (gen_add3_insn (updt_reg_rtx,
27513 frame_reg_rtx, GEN_INT (frame_off)));
27514 else if (REGNO (frame_reg_rtx) != updt_regno)
27515 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
27517 return NULL_RTX;
27520 /* Return the register number used as a pointer by out-of-line
27521 save/restore functions. */
27523 static inline unsigned
27524 ptr_regno_for_savres (int sel)
27526 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27527 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
27528 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
27531 /* Construct a parallel rtx describing the effect of a call to an
27532 out-of-line register save/restore routine, and emit the insn
27533 or jump_insn as appropriate. */
27535 static rtx_insn *
27536 rs6000_emit_savres_rtx (rs6000_stack_t *info,
27537 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
27538 machine_mode reg_mode, int sel)
27540 int i;
27541 int offset, start_reg, end_reg, n_regs, use_reg;
27542 int reg_size = GET_MODE_SIZE (reg_mode);
27543 rtx sym;
27544 rtvec p;
27545 rtx par;
27546 rtx_insn *insn;
27548 offset = 0;
27549 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27550 ? info->first_gp_reg_save
27551 : (sel & SAVRES_REG) == SAVRES_FPR
27552 ? info->first_fp_reg_save
27553 : (sel & SAVRES_REG) == SAVRES_VR
27554 ? info->first_altivec_reg_save
27555 : -1);
27556 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27557 ? 32
27558 : (sel & SAVRES_REG) == SAVRES_FPR
27559 ? 64
27560 : (sel & SAVRES_REG) == SAVRES_VR
27561 ? LAST_ALTIVEC_REGNO + 1
27562 : -1);
27563 n_regs = end_reg - start_reg;
27564 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
27565 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
27566 + n_regs);
27568 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27569 RTVEC_ELT (p, offset++) = ret_rtx;
27571 RTVEC_ELT (p, offset++)
27572 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27574 sym = rs6000_savres_routine_sym (info, sel);
27575 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
27577 use_reg = ptr_regno_for_savres (sel);
27578 if ((sel & SAVRES_REG) == SAVRES_VR)
27580 /* Vector regs are saved/restored using [reg+reg] addressing. */
27581 RTVEC_ELT (p, offset++)
27582 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27583 RTVEC_ELT (p, offset++)
27584 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
27586 else
27587 RTVEC_ELT (p, offset++)
27588 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27590 for (i = 0; i < end_reg - start_reg; i++)
27591 RTVEC_ELT (p, i + offset)
27592 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
27593 frame_reg_rtx, save_area_offset + reg_size * i,
27594 (sel & SAVRES_SAVE) != 0);
27596 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27597 RTVEC_ELT (p, i + offset)
27598 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
27600 par = gen_rtx_PARALLEL (VOIDmode, p);
27602 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27604 insn = emit_jump_insn (par);
27605 JUMP_LABEL (insn) = ret_rtx;
27607 else
27608 insn = emit_insn (par);
27609 return insn;
27612 /* Emit code to store CR fields that need to be saved into REG. */
27614 static void
27615 rs6000_emit_move_from_cr (rtx reg)
27617 /* Only the ELFv2 ABI allows storing only selected fields. */
27618 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
27620 int i, cr_reg[8], count = 0;
27622 /* Collect CR fields that must be saved. */
27623 for (i = 0; i < 8; i++)
27624 if (save_reg_p (CR0_REGNO + i))
27625 cr_reg[count++] = i;
27627 /* If it's just a single one, use mfcrf. */
27628 if (count == 1)
27630 rtvec p = rtvec_alloc (1);
27631 rtvec r = rtvec_alloc (2);
27632 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
27633 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
27634 RTVEC_ELT (p, 0)
27635 = gen_rtx_SET (reg,
27636 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
27638 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27639 return;
27642 /* ??? It might be better to handle count == 2 / 3 cases here
27643 as well, using logical operations to combine the values. */
27646 emit_insn (gen_movesi_from_cr (reg));
27649 /* Return whether the split-stack arg pointer (r12) is used. */
27651 static bool
27652 split_stack_arg_pointer_used_p (void)
27654 /* If the pseudo holding the arg pointer is no longer a pseudo,
27655 then the arg pointer is used. */
27656 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
27657 && (!REG_P (cfun->machine->split_stack_arg_pointer)
27658 || (REGNO (cfun->machine->split_stack_arg_pointer)
27659 < FIRST_PSEUDO_REGISTER)))
27660 return true;
27662 /* Unfortunately we also need to do some code scanning, since
27663 r12 may have been substituted for the pseudo. */
27664 rtx_insn *insn;
27665 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
27666 FOR_BB_INSNS (bb, insn)
27667 if (NONDEBUG_INSN_P (insn))
27669 /* A call destroys r12. */
27670 if (CALL_P (insn))
27671 return false;
27673 df_ref use;
27674 FOR_EACH_INSN_USE (use, insn)
27676 rtx x = DF_REF_REG (use);
27677 if (REG_P (x) && REGNO (x) == 12)
27678 return true;
27680 df_ref def;
27681 FOR_EACH_INSN_DEF (def, insn)
27683 rtx x = DF_REF_REG (def);
27684 if (REG_P (x) && REGNO (x) == 12)
27685 return false;
27688 return bitmap_bit_p (DF_LR_OUT (bb), 12);
27691 /* Return whether we need to emit an ELFv2 global entry point prologue. */
27693 static bool
27694 rs6000_global_entry_point_needed_p (void)
27696 /* Only needed for the ELFv2 ABI. */
27697 if (DEFAULT_ABI != ABI_ELFv2)
27698 return false;
27700 /* With -msingle-pic-base, we assume the whole program shares the same
27701 TOC, so no global entry point prologues are needed anywhere. */
27702 if (TARGET_SINGLE_PIC_BASE)
27703 return false;
27705 /* Ensure we have a global entry point for thunks. ??? We could
27706 avoid that if the target routine doesn't need a global entry point,
27707 but we do not know whether this is the case at this point. */
27708 if (cfun->is_thunk)
27709 return true;
27711 /* For regular functions, rs6000_emit_prologue sets this flag if the
27712 routine ever uses the TOC pointer. */
27713 return cfun->machine->r2_setup_needed;
27716 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
27717 static sbitmap
27718 rs6000_get_separate_components (void)
27720 rs6000_stack_t *info = rs6000_stack_info ();
27722 if (WORLD_SAVE_P (info))
27723 return NULL;
27725 if (TARGET_SPE_ABI)
27726 return NULL;
27728 sbitmap components = sbitmap_alloc (32);
27729 bitmap_clear (components);
27731 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
27732 && !(info->savres_strategy & REST_MULTIPLE));
27734 /* The GPRs we need saved to the frame. */
27735 if ((info->savres_strategy & SAVE_INLINE_GPRS)
27736 && (info->savres_strategy & REST_INLINE_GPRS))
27738 int reg_size = TARGET_32BIT ? 4 : 8;
27739 int offset = info->gp_save_offset;
27740 if (info->push_p)
27741 offset += info->total_size;
27743 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27745 if (IN_RANGE (offset, -0x8000, 0x7fff)
27746 && rs6000_reg_live_or_pic_offset_p (regno))
27747 bitmap_set_bit (components, regno);
27749 offset += reg_size;
27753 /* Don't mess with the hard frame pointer. */
27754 if (frame_pointer_needed)
27755 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
27757 /* Don't mess with the fixed TOC register. */
27758 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
27759 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27760 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27761 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
27763 /* Optimize LR save and restore if we can. This is component 0. Any
27764 out-of-line register save/restore routines need LR. */
27765 if (info->lr_save_p
27766 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27767 && (info->savres_strategy & SAVE_INLINE_GPRS)
27768 && (info->savres_strategy & REST_INLINE_GPRS)
27769 && (info->savres_strategy & SAVE_INLINE_FPRS)
27770 && (info->savres_strategy & REST_INLINE_FPRS)
27771 && (info->savres_strategy & SAVE_INLINE_VRS)
27772 && (info->savres_strategy & REST_INLINE_VRS))
27774 int offset = info->lr_save_offset;
27775 if (info->push_p)
27776 offset += info->total_size;
27777 if (IN_RANGE (offset, -0x8000, 0x7fff))
27778 bitmap_set_bit (components, 0);
27781 return components;
27784 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
27785 static sbitmap
27786 rs6000_components_for_bb (basic_block bb)
27788 rs6000_stack_t *info = rs6000_stack_info ();
27790 bitmap in = DF_LIVE_IN (bb);
27791 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
27792 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
27794 sbitmap components = sbitmap_alloc (32);
27795 bitmap_clear (components);
27797 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
27798 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27799 if (bitmap_bit_p (in, regno)
27800 || bitmap_bit_p (gen, regno)
27801 || bitmap_bit_p (kill, regno))
27802 bitmap_set_bit (components, regno);
27804 /* LR needs to be saved around a bb if it is killed in that bb. */
27805 if (bitmap_bit_p (gen, LR_REGNO)
27806 || bitmap_bit_p (kill, LR_REGNO))
27807 bitmap_set_bit (components, 0);
27809 return components;
27812 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
27813 static void
27814 rs6000_disqualify_components (sbitmap components, edge e,
27815 sbitmap edge_components, bool /*is_prologue*/)
27817 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
27818 live where we want to place that code. */
27819 if (bitmap_bit_p (edge_components, 0)
27820 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
27822 if (dump_file)
27823 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
27824 "on entry to bb %d\n", e->dest->index);
27825 bitmap_clear_bit (components, 0);
27829 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
27830 static void
27831 rs6000_emit_prologue_components (sbitmap components)
27833 rs6000_stack_t *info = rs6000_stack_info ();
27834 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27835 ? HARD_FRAME_POINTER_REGNUM
27836 : STACK_POINTER_REGNUM);
27837 int reg_size = TARGET_32BIT ? 4 : 8;
27839 /* Prologue for LR. */
27840 if (bitmap_bit_p (components, 0))
27842 rtx reg = gen_rtx_REG (Pmode, 0);
27843 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27844 RTX_FRAME_RELATED_P (insn) = 1;
27845 add_reg_note (insn, REG_CFA_REGISTER, NULL);
27847 int offset = info->lr_save_offset;
27848 if (info->push_p)
27849 offset += info->total_size;
27851 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27852 RTX_FRAME_RELATED_P (insn) = 1;
27853 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27854 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
27855 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
27858 /* Prologue for the GPRs. */
27859 int offset = info->gp_save_offset;
27860 if (info->push_p)
27861 offset += info->total_size;
27863 for (int i = info->first_gp_reg_save; i < 32; i++)
27865 if (bitmap_bit_p (components, i))
27867 rtx reg = gen_rtx_REG (Pmode, i);
27868 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27869 RTX_FRAME_RELATED_P (insn) = 1;
27870 rtx set = copy_rtx (single_set (insn));
27871 add_reg_note (insn, REG_CFA_OFFSET, set);
27874 offset += reg_size;
27878 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
27879 static void
27880 rs6000_emit_epilogue_components (sbitmap components)
27882 rs6000_stack_t *info = rs6000_stack_info ();
27883 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27884 ? HARD_FRAME_POINTER_REGNUM
27885 : STACK_POINTER_REGNUM);
27886 int reg_size = TARGET_32BIT ? 4 : 8;
27888 /* Epilogue for the GPRs. */
27889 int offset = info->gp_save_offset;
27890 if (info->push_p)
27891 offset += info->total_size;
27893 for (int i = info->first_gp_reg_save; i < 32; i++)
27895 if (bitmap_bit_p (components, i))
27897 rtx reg = gen_rtx_REG (Pmode, i);
27898 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27899 RTX_FRAME_RELATED_P (insn) = 1;
27900 add_reg_note (insn, REG_CFA_RESTORE, reg);
27903 offset += reg_size;
27906 /* Epilogue for LR. */
27907 if (bitmap_bit_p (components, 0))
27909 int offset = info->lr_save_offset;
27910 if (info->push_p)
27911 offset += info->total_size;
27913 rtx reg = gen_rtx_REG (Pmode, 0);
27914 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27916 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27917 insn = emit_move_insn (lr, reg);
27918 RTX_FRAME_RELATED_P (insn) = 1;
27919 add_reg_note (insn, REG_CFA_RESTORE, lr);
27923 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
27924 static void
27925 rs6000_set_handled_components (sbitmap components)
27927 rs6000_stack_t *info = rs6000_stack_info ();
27929 for (int i = info->first_gp_reg_save; i < 32; i++)
27930 if (bitmap_bit_p (components, i))
27931 cfun->machine->gpr_is_wrapped_separately[i] = true;
27933 if (bitmap_bit_p (components, 0))
27934 cfun->machine->lr_is_wrapped_separately = true;
27937 /* Emit function prologue as insns. */
27939 void
27940 rs6000_emit_prologue (void)
27942 rs6000_stack_t *info = rs6000_stack_info ();
27943 machine_mode reg_mode = Pmode;
27944 int reg_size = TARGET_32BIT ? 4 : 8;
27945 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27946 rtx frame_reg_rtx = sp_reg_rtx;
27947 unsigned int cr_save_regno;
27948 rtx cr_save_rtx = NULL_RTX;
27949 rtx_insn *insn;
27950 int strategy;
27951 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27952 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27953 && call_used_regs[STATIC_CHAIN_REGNUM]);
27954 int using_split_stack = (flag_split_stack
27955 && (lookup_attribute ("no_split_stack",
27956 DECL_ATTRIBUTES (cfun->decl))
27957 == NULL));
27959 /* Offset to top of frame for frame_reg and sp respectively. */
27960 HOST_WIDE_INT frame_off = 0;
27961 HOST_WIDE_INT sp_off = 0;
27962 /* sp_adjust is the stack adjusting instruction, tracked so that the
27963 insn setting up the split-stack arg pointer can be emitted just
27964 prior to it, when r12 is not used here for other purposes. */
27965 rtx_insn *sp_adjust = 0;
27967 #if CHECKING_P
27968 /* Track and check usage of r0, r11, r12. */
27969 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27970 #define START_USE(R) do \
27972 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27973 reg_inuse |= 1 << (R); \
27974 } while (0)
27975 #define END_USE(R) do \
27977 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27978 reg_inuse &= ~(1 << (R)); \
27979 } while (0)
27980 #define NOT_INUSE(R) do \
27982 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27983 } while (0)
27984 #else
27985 #define START_USE(R) do {} while (0)
27986 #define END_USE(R) do {} while (0)
27987 #define NOT_INUSE(R) do {} while (0)
27988 #endif
27990 if (DEFAULT_ABI == ABI_ELFv2
27991 && !TARGET_SINGLE_PIC_BASE)
27993 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27995 /* With -mminimal-toc we may generate an extra use of r2 below. */
27996 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27997 cfun->machine->r2_setup_needed = true;
28001 if (flag_stack_usage_info)
28002 current_function_static_stack_size = info->total_size;
28004 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
28006 HOST_WIDE_INT size = info->total_size;
28008 if (crtl->is_leaf && !cfun->calls_alloca)
28010 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
28011 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
28012 size - STACK_CHECK_PROTECT);
28014 else if (size > 0)
28015 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
28018 if (TARGET_FIX_AND_CONTINUE)
28020 /* gdb on darwin arranges to forward a function from the old
28021 address by modifying the first 5 instructions of the function
28022 to branch to the overriding function. This is necessary to
28023 permit function pointers that point to the old function to
28024 actually forward to the new function. */
28025 emit_insn (gen_nop ());
28026 emit_insn (gen_nop ());
28027 emit_insn (gen_nop ());
28028 emit_insn (gen_nop ());
28029 emit_insn (gen_nop ());
28032 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
28034 reg_mode = V2SImode;
28035 reg_size = 8;
28038 /* Handle world saves specially here. */
28039 if (WORLD_SAVE_P (info))
28041 int i, j, sz;
28042 rtx treg;
28043 rtvec p;
28044 rtx reg0;
28046 /* save_world expects lr in r0. */
28047 reg0 = gen_rtx_REG (Pmode, 0);
28048 if (info->lr_save_p)
28050 insn = emit_move_insn (reg0,
28051 gen_rtx_REG (Pmode, LR_REGNO));
28052 RTX_FRAME_RELATED_P (insn) = 1;
28055 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
28056 assumptions about the offsets of various bits of the stack
28057 frame. */
28058 gcc_assert (info->gp_save_offset == -220
28059 && info->fp_save_offset == -144
28060 && info->lr_save_offset == 8
28061 && info->cr_save_offset == 4
28062 && info->push_p
28063 && info->lr_save_p
28064 && (!crtl->calls_eh_return
28065 || info->ehrd_offset == -432)
28066 && info->vrsave_save_offset == -224
28067 && info->altivec_save_offset == -416);
28069 treg = gen_rtx_REG (SImode, 11);
28070 emit_move_insn (treg, GEN_INT (-info->total_size));
28072 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
28073 in R11. It also clobbers R12, so beware! */
28075 /* Preserve CR2 for save_world prologues */
28076 sz = 5;
28077 sz += 32 - info->first_gp_reg_save;
28078 sz += 64 - info->first_fp_reg_save;
28079 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
28080 p = rtvec_alloc (sz);
28081 j = 0;
28082 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
28083 gen_rtx_REG (SImode,
28084 LR_REGNO));
28085 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
28086 gen_rtx_SYMBOL_REF (Pmode,
28087 "*save_world"));
28088 /* We do floats first so that the instruction pattern matches
28089 properly. */
28090 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28091 RTVEC_ELT (p, j++)
28092 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28093 ? DFmode : SFmode,
28094 info->first_fp_reg_save + i),
28095 frame_reg_rtx,
28096 info->fp_save_offset + frame_off + 8 * i);
28097 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28098 RTVEC_ELT (p, j++)
28099 = gen_frame_store (gen_rtx_REG (V4SImode,
28100 info->first_altivec_reg_save + i),
28101 frame_reg_rtx,
28102 info->altivec_save_offset + frame_off + 16 * i);
28103 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28104 RTVEC_ELT (p, j++)
28105 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28106 frame_reg_rtx,
28107 info->gp_save_offset + frame_off + reg_size * i);
28109 /* CR register traditionally saved as CR2. */
28110 RTVEC_ELT (p, j++)
28111 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
28112 frame_reg_rtx, info->cr_save_offset + frame_off);
28113 /* Explain about use of R0. */
28114 if (info->lr_save_p)
28115 RTVEC_ELT (p, j++)
28116 = gen_frame_store (reg0,
28117 frame_reg_rtx, info->lr_save_offset + frame_off);
28118 /* Explain what happens to the stack pointer. */
28120 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
28121 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
28124 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28125 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28126 treg, GEN_INT (-info->total_size));
28127 sp_off = frame_off = info->total_size;
28130 strategy = info->savres_strategy;
28132 /* For V.4, update stack before we do any saving and set back pointer. */
28133 if (! WORLD_SAVE_P (info)
28134 && info->push_p
28135 && (DEFAULT_ABI == ABI_V4
28136 || crtl->calls_eh_return))
28138 bool need_r11 = (TARGET_SPE
28139 ? (!(strategy & SAVE_INLINE_GPRS)
28140 && info->spe_64bit_regs_used == 0)
28141 : (!(strategy & SAVE_INLINE_FPRS)
28142 || !(strategy & SAVE_INLINE_GPRS)
28143 || !(strategy & SAVE_INLINE_VRS)));
28144 int ptr_regno = -1;
28145 rtx ptr_reg = NULL_RTX;
28146 int ptr_off = 0;
28148 if (info->total_size < 32767)
28149 frame_off = info->total_size;
28150 else if (need_r11)
28151 ptr_regno = 11;
28152 else if (info->cr_save_p
28153 || info->lr_save_p
28154 || info->first_fp_reg_save < 64
28155 || info->first_gp_reg_save < 32
28156 || info->altivec_size != 0
28157 || info->vrsave_size != 0
28158 || crtl->calls_eh_return)
28159 ptr_regno = 12;
28160 else
28162 /* The prologue won't be saving any regs so there is no need
28163 to set up a frame register to access any frame save area.
28164 We also won't be using frame_off anywhere below, but set
28165 the correct value anyway to protect against future
28166 changes to this function. */
28167 frame_off = info->total_size;
28169 if (ptr_regno != -1)
28171 /* Set up the frame offset to that needed by the first
28172 out-of-line save function. */
28173 START_USE (ptr_regno);
28174 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28175 frame_reg_rtx = ptr_reg;
28176 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
28177 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
28178 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
28179 ptr_off = info->gp_save_offset + info->gp_size;
28180 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
28181 ptr_off = info->altivec_save_offset + info->altivec_size;
28182 frame_off = -ptr_off;
28184 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28185 ptr_reg, ptr_off);
28186 if (REGNO (frame_reg_rtx) == 12)
28187 sp_adjust = 0;
28188 sp_off = info->total_size;
28189 if (frame_reg_rtx != sp_reg_rtx)
28190 rs6000_emit_stack_tie (frame_reg_rtx, false);
28193 /* If we use the link register, get it into r0. */
28194 if (!WORLD_SAVE_P (info) && info->lr_save_p
28195 && !cfun->machine->lr_is_wrapped_separately)
28197 rtx addr, reg, mem;
28199 reg = gen_rtx_REG (Pmode, 0);
28200 START_USE (0);
28201 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
28202 RTX_FRAME_RELATED_P (insn) = 1;
28204 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
28205 | SAVE_NOINLINE_FPRS_SAVES_LR)))
28207 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28208 GEN_INT (info->lr_save_offset + frame_off));
28209 mem = gen_rtx_MEM (Pmode, addr);
28210 /* This should not be of rs6000_sr_alias_set, because of
28211 __builtin_return_address. */
28213 insn = emit_move_insn (mem, reg);
28214 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28215 NULL_RTX, NULL_RTX);
28216 END_USE (0);
28220 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
28221 r12 will be needed by out-of-line gpr restore. */
28222 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28223 && !(strategy & (SAVE_INLINE_GPRS
28224 | SAVE_NOINLINE_GPRS_SAVES_LR))
28225 ? 11 : 12);
28226 if (!WORLD_SAVE_P (info)
28227 && info->cr_save_p
28228 && REGNO (frame_reg_rtx) != cr_save_regno
28229 && !(using_static_chain_p && cr_save_regno == 11)
28230 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
28232 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
28233 START_USE (cr_save_regno);
28234 rs6000_emit_move_from_cr (cr_save_rtx);
28237 /* Do any required saving of fpr's. If only one or two to save, do
28238 it ourselves. Otherwise, call function. */
28239 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
28241 int i;
28242 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28243 if (save_reg_p (info->first_fp_reg_save + i))
28244 emit_frame_save (frame_reg_rtx,
28245 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28246 ? DFmode : SFmode),
28247 info->first_fp_reg_save + i,
28248 info->fp_save_offset + frame_off + 8 * i,
28249 sp_off - frame_off);
28251 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
28253 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28254 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28255 unsigned ptr_regno = ptr_regno_for_savres (sel);
28256 rtx ptr_reg = frame_reg_rtx;
28258 if (REGNO (frame_reg_rtx) == ptr_regno)
28259 gcc_checking_assert (frame_off == 0);
28260 else
28262 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28263 NOT_INUSE (ptr_regno);
28264 emit_insn (gen_add3_insn (ptr_reg,
28265 frame_reg_rtx, GEN_INT (frame_off)));
28267 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28268 info->fp_save_offset,
28269 info->lr_save_offset,
28270 DFmode, sel);
28271 rs6000_frame_related (insn, ptr_reg, sp_off,
28272 NULL_RTX, NULL_RTX);
28273 if (lr)
28274 END_USE (0);
28277 /* Save GPRs. This is done as a PARALLEL if we are using
28278 the store-multiple instructions. */
28279 if (!WORLD_SAVE_P (info)
28280 && TARGET_SPE_ABI
28281 && info->spe_64bit_regs_used != 0
28282 && info->first_gp_reg_save != 32)
28284 int i;
28285 rtx spe_save_area_ptr;
28286 HOST_WIDE_INT save_off;
28287 int ool_adjust = 0;
28289 /* Determine whether we can address all of the registers that need
28290 to be saved with an offset from frame_reg_rtx that fits in
28291 the small const field for SPE memory instructions. */
28292 int spe_regs_addressable
28293 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28294 + reg_size * (32 - info->first_gp_reg_save - 1))
28295 && (strategy & SAVE_INLINE_GPRS));
28297 if (spe_regs_addressable)
28299 spe_save_area_ptr = frame_reg_rtx;
28300 save_off = frame_off;
28302 else
28304 /* Make r11 point to the start of the SPE save area. We need
28305 to be careful here if r11 is holding the static chain. If
28306 it is, then temporarily save it in r0. */
28307 HOST_WIDE_INT offset;
28309 if (!(strategy & SAVE_INLINE_GPRS))
28310 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28311 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
28312 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
28313 save_off = frame_off - offset;
28315 if (using_static_chain_p)
28317 rtx r0 = gen_rtx_REG (Pmode, 0);
28319 START_USE (0);
28320 gcc_assert (info->first_gp_reg_save > 11);
28322 emit_move_insn (r0, spe_save_area_ptr);
28324 else if (REGNO (frame_reg_rtx) != 11)
28325 START_USE (11);
28327 emit_insn (gen_addsi3 (spe_save_area_ptr,
28328 frame_reg_rtx, GEN_INT (offset)));
28329 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
28330 frame_off = -info->spe_gp_save_offset + ool_adjust;
28333 if ((strategy & SAVE_INLINE_GPRS))
28335 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28336 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28337 emit_frame_save (spe_save_area_ptr, reg_mode,
28338 info->first_gp_reg_save + i,
28339 (info->spe_gp_save_offset + save_off
28340 + reg_size * i),
28341 sp_off - save_off);
28343 else
28345 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
28346 info->spe_gp_save_offset + save_off,
28347 0, reg_mode,
28348 SAVRES_SAVE | SAVRES_GPR);
28350 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
28351 NULL_RTX, NULL_RTX);
28354 /* Move the static chain pointer back. */
28355 if (!spe_regs_addressable)
28357 if (using_static_chain_p)
28359 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
28360 END_USE (0);
28362 else if (REGNO (frame_reg_rtx) != 11)
28363 END_USE (11);
28366 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
28368 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
28369 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
28370 unsigned ptr_regno = ptr_regno_for_savres (sel);
28371 rtx ptr_reg = frame_reg_rtx;
28372 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
28373 int end_save = info->gp_save_offset + info->gp_size;
28374 int ptr_off;
28376 if (ptr_regno == 12)
28377 sp_adjust = 0;
28378 if (!ptr_set_up)
28379 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28381 /* Need to adjust r11 (r12) if we saved any FPRs. */
28382 if (end_save + frame_off != 0)
28384 rtx offset = GEN_INT (end_save + frame_off);
28386 if (ptr_set_up)
28387 frame_off = -end_save;
28388 else
28389 NOT_INUSE (ptr_regno);
28390 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28392 else if (!ptr_set_up)
28394 NOT_INUSE (ptr_regno);
28395 emit_move_insn (ptr_reg, frame_reg_rtx);
28397 ptr_off = -end_save;
28398 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28399 info->gp_save_offset + ptr_off,
28400 info->lr_save_offset + ptr_off,
28401 reg_mode, sel);
28402 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
28403 NULL_RTX, NULL_RTX);
28404 if (lr)
28405 END_USE (0);
28407 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
28409 rtvec p;
28410 int i;
28411 p = rtvec_alloc (32 - info->first_gp_reg_save);
28412 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28413 RTVEC_ELT (p, i)
28414 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28415 frame_reg_rtx,
28416 info->gp_save_offset + frame_off + reg_size * i);
28417 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28418 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28419 NULL_RTX, NULL_RTX);
28421 else if (!WORLD_SAVE_P (info))
28423 int offset = info->gp_save_offset + frame_off;
28424 for (int i = info->first_gp_reg_save; i < 32; i++)
28426 if (rs6000_reg_live_or_pic_offset_p (i)
28427 && !cfun->machine->gpr_is_wrapped_separately[i])
28428 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
28429 sp_off - frame_off);
28431 offset += reg_size;
28435 if (crtl->calls_eh_return)
28437 unsigned int i;
28438 rtvec p;
28440 for (i = 0; ; ++i)
28442 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28443 if (regno == INVALID_REGNUM)
28444 break;
28447 p = rtvec_alloc (i);
28449 for (i = 0; ; ++i)
28451 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28452 if (regno == INVALID_REGNUM)
28453 break;
28455 rtx set
28456 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
28457 sp_reg_rtx,
28458 info->ehrd_offset + sp_off + reg_size * (int) i);
28459 RTVEC_ELT (p, i) = set;
28460 RTX_FRAME_RELATED_P (set) = 1;
28463 insn = emit_insn (gen_blockage ());
28464 RTX_FRAME_RELATED_P (insn) = 1;
28465 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
28468 /* In AIX ABI we need to make sure r2 is really saved. */
28469 if (TARGET_AIX && crtl->calls_eh_return)
28471 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
28472 rtx join_insn, note;
28473 rtx_insn *save_insn;
28474 long toc_restore_insn;
28476 tmp_reg = gen_rtx_REG (Pmode, 11);
28477 tmp_reg_si = gen_rtx_REG (SImode, 11);
28478 if (using_static_chain_p)
28480 START_USE (0);
28481 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
28483 else
28484 START_USE (11);
28485 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
28486 /* Peek at instruction to which this function returns. If it's
28487 restoring r2, then we know we've already saved r2. We can't
28488 unconditionally save r2 because the value we have will already
28489 be updated if we arrived at this function via a plt call or
28490 toc adjusting stub. */
28491 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
28492 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
28493 + RS6000_TOC_SAVE_SLOT);
28494 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
28495 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
28496 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
28497 validate_condition_mode (EQ, CCUNSmode);
28498 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
28499 emit_insn (gen_rtx_SET (compare_result,
28500 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
28501 toc_save_done = gen_label_rtx ();
28502 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28503 gen_rtx_EQ (VOIDmode, compare_result,
28504 const0_rtx),
28505 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
28506 pc_rtx);
28507 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28508 JUMP_LABEL (jump) = toc_save_done;
28509 LABEL_NUSES (toc_save_done) += 1;
28511 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
28512 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
28513 sp_off - frame_off);
28515 emit_label (toc_save_done);
28517 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
28518 have a CFG that has different saves along different paths.
28519 Move the note to a dummy blockage insn, which describes that
28520 R2 is unconditionally saved after the label. */
28521 /* ??? An alternate representation might be a special insn pattern
28522 containing both the branch and the store. That might let the
28523 code that minimizes the number of DW_CFA_advance opcodes better
28524 freedom in placing the annotations. */
28525 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
28526 if (note)
28527 remove_note (save_insn, note);
28528 else
28529 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
28530 copy_rtx (PATTERN (save_insn)), NULL_RTX);
28531 RTX_FRAME_RELATED_P (save_insn) = 0;
28533 join_insn = emit_insn (gen_blockage ());
28534 REG_NOTES (join_insn) = note;
28535 RTX_FRAME_RELATED_P (join_insn) = 1;
28537 if (using_static_chain_p)
28539 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
28540 END_USE (0);
28542 else
28543 END_USE (11);
28546 /* Save CR if we use any that must be preserved. */
28547 if (!WORLD_SAVE_P (info) && info->cr_save_p)
28549 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28550 GEN_INT (info->cr_save_offset + frame_off));
28551 rtx mem = gen_frame_mem (SImode, addr);
28553 /* If we didn't copy cr before, do so now using r0. */
28554 if (cr_save_rtx == NULL_RTX)
28556 START_USE (0);
28557 cr_save_rtx = gen_rtx_REG (SImode, 0);
28558 rs6000_emit_move_from_cr (cr_save_rtx);
28561 /* Saving CR requires a two-instruction sequence: one instruction
28562 to move the CR to a general-purpose register, and a second
28563 instruction that stores the GPR to memory.
28565 We do not emit any DWARF CFI records for the first of these,
28566 because we cannot properly represent the fact that CR is saved in
28567 a register. One reason is that we cannot express that multiple
28568 CR fields are saved; another reason is that on 64-bit, the size
28569 of the CR register in DWARF (4 bytes) differs from the size of
28570 a general-purpose register.
28572 This means if any intervening instruction were to clobber one of
28573 the call-saved CR fields, we'd have incorrect CFI. To prevent
28574 this from happening, we mark the store to memory as a use of
28575 those CR fields, which prevents any such instruction from being
28576 scheduled in between the two instructions. */
28577 rtx crsave_v[9];
28578 int n_crsave = 0;
28579 int i;
28581 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
28582 for (i = 0; i < 8; i++)
28583 if (save_reg_p (CR0_REGNO + i))
28584 crsave_v[n_crsave++]
28585 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28587 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
28588 gen_rtvec_v (n_crsave, crsave_v)));
28589 END_USE (REGNO (cr_save_rtx));
28591 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
28592 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
28593 so we need to construct a frame expression manually. */
28594 RTX_FRAME_RELATED_P (insn) = 1;
28596 /* Update address to be stack-pointer relative, like
28597 rs6000_frame_related would do. */
28598 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28599 GEN_INT (info->cr_save_offset + sp_off));
28600 mem = gen_frame_mem (SImode, addr);
28602 if (DEFAULT_ABI == ABI_ELFv2)
28604 /* In the ELFv2 ABI we generate separate CFI records for each
28605 CR field that was actually saved. They all point to the
28606 same 32-bit stack slot. */
28607 rtx crframe[8];
28608 int n_crframe = 0;
28610 for (i = 0; i < 8; i++)
28611 if (save_reg_p (CR0_REGNO + i))
28613 crframe[n_crframe]
28614 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
28616 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
28617 n_crframe++;
28620 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28621 gen_rtx_PARALLEL (VOIDmode,
28622 gen_rtvec_v (n_crframe, crframe)));
28624 else
28626 /* In other ABIs, by convention, we use a single CR regnum to
28627 represent the fact that all call-saved CR fields are saved.
28628 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
28629 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
28630 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
28634 /* In the ELFv2 ABI we need to save all call-saved CR fields into
28635 *separate* slots if the routine calls __builtin_eh_return, so
28636 that they can be independently restored by the unwinder. */
28637 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28639 int i, cr_off = info->ehcr_offset;
28640 rtx crsave;
28642 /* ??? We might get better performance by using multiple mfocrf
28643 instructions. */
28644 crsave = gen_rtx_REG (SImode, 0);
28645 emit_insn (gen_movesi_from_cr (crsave));
28647 for (i = 0; i < 8; i++)
28648 if (!call_used_regs[CR0_REGNO + i])
28650 rtvec p = rtvec_alloc (2);
28651 RTVEC_ELT (p, 0)
28652 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
28653 RTVEC_ELT (p, 1)
28654 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28656 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28658 RTX_FRAME_RELATED_P (insn) = 1;
28659 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28660 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
28661 sp_reg_rtx, cr_off + sp_off));
28663 cr_off += reg_size;
28667 /* Update stack and set back pointer unless this is V.4,
28668 for which it was done previously. */
28669 if (!WORLD_SAVE_P (info) && info->push_p
28670 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
28672 rtx ptr_reg = NULL;
28673 int ptr_off = 0;
28675 /* If saving altivec regs we need to be able to address all save
28676 locations using a 16-bit offset. */
28677 if ((strategy & SAVE_INLINE_VRS) == 0
28678 || (info->altivec_size != 0
28679 && (info->altivec_save_offset + info->altivec_size - 16
28680 + info->total_size - frame_off) > 32767)
28681 || (info->vrsave_size != 0
28682 && (info->vrsave_save_offset
28683 + info->total_size - frame_off) > 32767))
28685 int sel = SAVRES_SAVE | SAVRES_VR;
28686 unsigned ptr_regno = ptr_regno_for_savres (sel);
28688 if (using_static_chain_p
28689 && ptr_regno == STATIC_CHAIN_REGNUM)
28690 ptr_regno = 12;
28691 if (REGNO (frame_reg_rtx) != ptr_regno)
28692 START_USE (ptr_regno);
28693 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28694 frame_reg_rtx = ptr_reg;
28695 ptr_off = info->altivec_save_offset + info->altivec_size;
28696 frame_off = -ptr_off;
28698 else if (REGNO (frame_reg_rtx) == 1)
28699 frame_off = info->total_size;
28700 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28701 ptr_reg, ptr_off);
28702 if (REGNO (frame_reg_rtx) == 12)
28703 sp_adjust = 0;
28704 sp_off = info->total_size;
28705 if (frame_reg_rtx != sp_reg_rtx)
28706 rs6000_emit_stack_tie (frame_reg_rtx, false);
28709 /* Set frame pointer, if needed. */
28710 if (frame_pointer_needed)
28712 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
28713 sp_reg_rtx);
28714 RTX_FRAME_RELATED_P (insn) = 1;
28717 /* Save AltiVec registers if needed. Save here because the red zone does
28718 not always include AltiVec registers. */
28719 if (!WORLD_SAVE_P (info)
28720 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
28722 int end_save = info->altivec_save_offset + info->altivec_size;
28723 int ptr_off;
28724 /* Oddly, the vector save/restore functions point r0 at the end
28725 of the save area, then use r11 or r12 to load offsets for
28726 [reg+reg] addressing. */
28727 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28728 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
28729 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28731 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28732 NOT_INUSE (0);
28733 if (scratch_regno == 12)
28734 sp_adjust = 0;
28735 if (end_save + frame_off != 0)
28737 rtx offset = GEN_INT (end_save + frame_off);
28739 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28741 else
28742 emit_move_insn (ptr_reg, frame_reg_rtx);
28744 ptr_off = -end_save;
28745 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28746 info->altivec_save_offset + ptr_off,
28747 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
28748 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
28749 NULL_RTX, NULL_RTX);
28750 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28752 /* The oddity mentioned above clobbered our frame reg. */
28753 emit_move_insn (frame_reg_rtx, ptr_reg);
28754 frame_off = ptr_off;
28757 else if (!WORLD_SAVE_P (info)
28758 && info->altivec_size != 0)
28760 int i;
28762 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28763 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28765 rtx areg, savereg, mem;
28766 HOST_WIDE_INT offset;
28768 offset = (info->altivec_save_offset + frame_off
28769 + 16 * (i - info->first_altivec_reg_save));
28771 savereg = gen_rtx_REG (V4SImode, i);
28773 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28775 mem = gen_frame_mem (V4SImode,
28776 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28777 GEN_INT (offset)));
28778 insn = emit_insn (gen_rtx_SET (mem, savereg));
28779 areg = NULL_RTX;
28781 else
28783 NOT_INUSE (0);
28784 areg = gen_rtx_REG (Pmode, 0);
28785 emit_move_insn (areg, GEN_INT (offset));
28787 /* AltiVec addressing mode is [reg+reg]. */
28788 mem = gen_frame_mem (V4SImode,
28789 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
28791 /* Rather than emitting a generic move, force use of the stvx
28792 instruction, which we always want on ISA 2.07 (power8) systems.
28793 In particular we don't want xxpermdi/stxvd2x for little
28794 endian. */
28795 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
28798 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28799 areg, GEN_INT (offset));
28803 /* VRSAVE is a bit vector representing which AltiVec registers
28804 are used. The OS uses this to determine which vector
28805 registers to save on a context switch. We need to save
28806 VRSAVE on the stack frame, add whatever AltiVec registers we
28807 used in this function, and do the corresponding magic in the
28808 epilogue. */
28810 if (!WORLD_SAVE_P (info)
28811 && info->vrsave_size != 0)
28813 rtx reg, vrsave;
28814 int offset;
28815 int save_regno;
28817 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
28818 be using r12 as frame_reg_rtx and r11 as the static chain
28819 pointer for nested functions. */
28820 save_regno = 12;
28821 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28822 && !using_static_chain_p)
28823 save_regno = 11;
28824 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
28826 save_regno = 11;
28827 if (using_static_chain_p)
28828 save_regno = 0;
28831 NOT_INUSE (save_regno);
28832 reg = gen_rtx_REG (SImode, save_regno);
28833 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28834 if (TARGET_MACHO)
28835 emit_insn (gen_get_vrsave_internal (reg));
28836 else
28837 emit_insn (gen_rtx_SET (reg, vrsave));
28839 /* Save VRSAVE. */
28840 offset = info->vrsave_save_offset + frame_off;
28841 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
28843 /* Include the registers in the mask. */
28844 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
28846 insn = emit_insn (generate_set_vrsave (reg, info, 0));
28849 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
28850 if (!TARGET_SINGLE_PIC_BASE
28851 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
28852 || (DEFAULT_ABI == ABI_V4
28853 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
28854 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
28856 /* If emit_load_toc_table will use the link register, we need to save
28857 it. We use R12 for this purpose because emit_load_toc_table
28858 can use register 0. This allows us to use a plain 'blr' to return
28859 from the procedure more often. */
28860 int save_LR_around_toc_setup = (TARGET_ELF
28861 && DEFAULT_ABI == ABI_V4
28862 && flag_pic
28863 && ! info->lr_save_p
28864 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
28865 if (save_LR_around_toc_setup)
28867 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28868 rtx tmp = gen_rtx_REG (Pmode, 12);
28870 sp_adjust = 0;
28871 insn = emit_move_insn (tmp, lr);
28872 RTX_FRAME_RELATED_P (insn) = 1;
28874 rs6000_emit_load_toc_table (TRUE);
28876 insn = emit_move_insn (lr, tmp);
28877 add_reg_note (insn, REG_CFA_RESTORE, lr);
28878 RTX_FRAME_RELATED_P (insn) = 1;
28880 else
28881 rs6000_emit_load_toc_table (TRUE);
28884 #if TARGET_MACHO
28885 if (!TARGET_SINGLE_PIC_BASE
28886 && DEFAULT_ABI == ABI_DARWIN
28887 && flag_pic && crtl->uses_pic_offset_table)
28889 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28890 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
28892 /* Save and restore LR locally around this call (in R0). */
28893 if (!info->lr_save_p)
28894 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
28896 emit_insn (gen_load_macho_picbase (src));
28898 emit_move_insn (gen_rtx_REG (Pmode,
28899 RS6000_PIC_OFFSET_TABLE_REGNUM),
28900 lr);
28902 if (!info->lr_save_p)
28903 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
28905 #endif
28907 /* If we need to, save the TOC register after doing the stack setup.
28908 Do not emit eh frame info for this save. The unwinder wants info,
28909 conceptually attached to instructions in this function, about
28910 register values in the caller of this function. This R2 may have
28911 already been changed from the value in the caller.
28912 We don't attempt to write accurate DWARF EH frame info for R2
28913 because code emitted by gcc for a (non-pointer) function call
28914 doesn't save and restore R2. Instead, R2 is managed out-of-line
28915 by a linker generated plt call stub when the function resides in
28916 a shared library. This behavior is costly to describe in DWARF,
28917 both in terms of the size of DWARF info and the time taken in the
28918 unwinder to interpret it. R2 changes, apart from the
28919 calls_eh_return case earlier in this function, are handled by
28920 linux-unwind.h frob_update_context. */
28921 if (rs6000_save_toc_in_prologue_p ())
28923 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
28924 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
28927 if (using_split_stack && split_stack_arg_pointer_used_p ())
28929 /* Set up the arg pointer (r12) for -fsplit-stack code. If
28930 __morestack was called, it left the arg pointer to the old
28931 stack in r29. Otherwise, the arg pointer is the top of the
28932 current frame. */
28933 cfun->machine->split_stack_argp_used = true;
28934 if (sp_adjust)
28936 rtx r12 = gen_rtx_REG (Pmode, 12);
28937 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
28938 emit_insn_before (set_r12, sp_adjust);
28940 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
28942 rtx r12 = gen_rtx_REG (Pmode, 12);
28943 if (frame_off == 0)
28944 emit_move_insn (r12, frame_reg_rtx);
28945 else
28946 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
28948 if (info->push_p)
28950 rtx r12 = gen_rtx_REG (Pmode, 12);
28951 rtx r29 = gen_rtx_REG (Pmode, 29);
28952 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28953 rtx not_more = gen_label_rtx ();
28954 rtx jump;
28956 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28957 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
28958 gen_rtx_LABEL_REF (VOIDmode, not_more),
28959 pc_rtx);
28960 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28961 JUMP_LABEL (jump) = not_more;
28962 LABEL_NUSES (not_more) += 1;
28963 emit_move_insn (r12, r29);
28964 emit_label (not_more);
28969 /* Output .extern statements for the save/restore routines we use. */
28971 static void
28972 rs6000_output_savres_externs (FILE *file)
28974 rs6000_stack_t *info = rs6000_stack_info ();
28976 if (TARGET_DEBUG_STACK)
28977 debug_stack_info (info);
28979 /* Write .extern for any function we will call to save and restore
28980 fp values. */
28981 if (info->first_fp_reg_save < 64
28982 && !TARGET_MACHO
28983 && !TARGET_ELF)
28985 char *name;
28986 int regno = info->first_fp_reg_save - 32;
28988 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
28990 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28991 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28992 name = rs6000_savres_routine_name (info, regno, sel);
28993 fprintf (file, "\t.extern %s\n", name);
28995 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
28997 bool lr = (info->savres_strategy
28998 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28999 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29000 name = rs6000_savres_routine_name (info, regno, sel);
29001 fprintf (file, "\t.extern %s\n", name);
29006 /* Write function prologue. */
29008 static void
29009 rs6000_output_function_prologue (FILE *file,
29010 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
29012 if (!cfun->is_thunk)
29013 rs6000_output_savres_externs (file);
29015 /* ELFv2 ABI r2 setup code and local entry point. This must follow
29016 immediately after the global entry point label. */
29017 if (rs6000_global_entry_point_needed_p ())
29019 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29021 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
29023 if (TARGET_CMODEL != CMODEL_LARGE)
29025 /* In the small and medium code models, we assume the TOC is less
29026 2 GB away from the text section, so it can be computed via the
29027 following two-instruction sequence. */
29028 char buf[256];
29030 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29031 fprintf (file, "0:\taddis 2,12,.TOC.-");
29032 assemble_name (file, buf);
29033 fprintf (file, "@ha\n");
29034 fprintf (file, "\taddi 2,2,.TOC.-");
29035 assemble_name (file, buf);
29036 fprintf (file, "@l\n");
29038 else
29040 /* In the large code model, we allow arbitrary offsets between the
29041 TOC and the text section, so we have to load the offset from
29042 memory. The data field is emitted directly before the global
29043 entry point in rs6000_elf_declare_function_name. */
29044 char buf[256];
29046 #ifdef HAVE_AS_ENTRY_MARKERS
29047 /* If supported by the linker, emit a marker relocation. If the
29048 total code size of the final executable or shared library
29049 happens to fit into 2 GB after all, the linker will replace
29050 this code sequence with the sequence for the small or medium
29051 code model. */
29052 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
29053 #endif
29054 fprintf (file, "\tld 2,");
29055 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
29056 assemble_name (file, buf);
29057 fprintf (file, "-");
29058 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29059 assemble_name (file, buf);
29060 fprintf (file, "(12)\n");
29061 fprintf (file, "\tadd 2,2,12\n");
29064 fputs ("\t.localentry\t", file);
29065 assemble_name (file, name);
29066 fputs (",.-", file);
29067 assemble_name (file, name);
29068 fputs ("\n", file);
29071 /* Output -mprofile-kernel code. This needs to be done here instead of
29072 in output_function_profile since it must go after the ELFv2 ABI
29073 local entry point. */
29074 if (TARGET_PROFILE_KERNEL && crtl->profile)
29076 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
29077 gcc_assert (!TARGET_32BIT);
29079 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
29081 /* In the ELFv2 ABI we have no compiler stack word. It must be
29082 the resposibility of _mcount to preserve the static chain
29083 register if required. */
29084 if (DEFAULT_ABI != ABI_ELFv2
29085 && cfun->static_chain_decl != NULL)
29087 asm_fprintf (file, "\tstd %s,24(%s)\n",
29088 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29089 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29090 asm_fprintf (file, "\tld %s,24(%s)\n",
29091 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29093 else
29094 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29097 rs6000_pic_labelno++;
29100 /* -mprofile-kernel code calls mcount before the function prolog,
29101 so a profiled leaf function should stay a leaf function. */
29102 static bool
29103 rs6000_keep_leaf_when_profiled ()
29105 return TARGET_PROFILE_KERNEL;
29108 /* Non-zero if vmx regs are restored before the frame pop, zero if
29109 we restore after the pop when possible. */
29110 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
29112 /* Restoring cr is a two step process: loading a reg from the frame
29113 save, then moving the reg to cr. For ABI_V4 we must let the
29114 unwinder know that the stack location is no longer valid at or
29115 before the stack deallocation, but we can't emit a cfa_restore for
29116 cr at the stack deallocation like we do for other registers.
29117 The trouble is that it is possible for the move to cr to be
29118 scheduled after the stack deallocation. So say exactly where cr
29119 is located on each of the two insns. */
29121 static rtx
29122 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
29124 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
29125 rtx reg = gen_rtx_REG (SImode, regno);
29126 rtx_insn *insn = emit_move_insn (reg, mem);
29128 if (!exit_func && DEFAULT_ABI == ABI_V4)
29130 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29131 rtx set = gen_rtx_SET (reg, cr);
29133 add_reg_note (insn, REG_CFA_REGISTER, set);
29134 RTX_FRAME_RELATED_P (insn) = 1;
29136 return reg;
29139 /* Reload CR from REG. */
29141 static void
29142 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
29144 int count = 0;
29145 int i;
29147 if (using_mfcr_multiple)
29149 for (i = 0; i < 8; i++)
29150 if (save_reg_p (CR0_REGNO + i))
29151 count++;
29152 gcc_assert (count);
29155 if (using_mfcr_multiple && count > 1)
29157 rtx_insn *insn;
29158 rtvec p;
29159 int ndx;
29161 p = rtvec_alloc (count);
29163 ndx = 0;
29164 for (i = 0; i < 8; i++)
29165 if (save_reg_p (CR0_REGNO + i))
29167 rtvec r = rtvec_alloc (2);
29168 RTVEC_ELT (r, 0) = reg;
29169 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
29170 RTVEC_ELT (p, ndx) =
29171 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
29172 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
29173 ndx++;
29175 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29176 gcc_assert (ndx == count);
29178 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29179 CR field separately. */
29180 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29182 for (i = 0; i < 8; i++)
29183 if (save_reg_p (CR0_REGNO + i))
29184 add_reg_note (insn, REG_CFA_RESTORE,
29185 gen_rtx_REG (SImode, CR0_REGNO + i));
29187 RTX_FRAME_RELATED_P (insn) = 1;
29190 else
29191 for (i = 0; i < 8; i++)
29192 if (save_reg_p (CR0_REGNO + i))
29194 rtx insn = emit_insn (gen_movsi_to_cr_one
29195 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29197 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29198 CR field separately, attached to the insn that in fact
29199 restores this particular CR field. */
29200 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29202 add_reg_note (insn, REG_CFA_RESTORE,
29203 gen_rtx_REG (SImode, CR0_REGNO + i));
29205 RTX_FRAME_RELATED_P (insn) = 1;
29209 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
29210 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
29211 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29213 rtx_insn *insn = get_last_insn ();
29214 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29216 add_reg_note (insn, REG_CFA_RESTORE, cr);
29217 RTX_FRAME_RELATED_P (insn) = 1;
29221 /* Like cr, the move to lr instruction can be scheduled after the
29222 stack deallocation, but unlike cr, its stack frame save is still
29223 valid. So we only need to emit the cfa_restore on the correct
29224 instruction. */
29226 static void
29227 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
29229 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
29230 rtx reg = gen_rtx_REG (Pmode, regno);
29232 emit_move_insn (reg, mem);
29235 static void
29236 restore_saved_lr (int regno, bool exit_func)
29238 rtx reg = gen_rtx_REG (Pmode, regno);
29239 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29240 rtx_insn *insn = emit_move_insn (lr, reg);
29242 if (!exit_func && flag_shrink_wrap)
29244 add_reg_note (insn, REG_CFA_RESTORE, lr);
29245 RTX_FRAME_RELATED_P (insn) = 1;
29249 static rtx
29250 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
29252 if (DEFAULT_ABI == ABI_ELFv2)
29254 int i;
29255 for (i = 0; i < 8; i++)
29256 if (save_reg_p (CR0_REGNO + i))
29258 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
29259 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
29260 cfa_restores);
29263 else if (info->cr_save_p)
29264 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29265 gen_rtx_REG (SImode, CR2_REGNO),
29266 cfa_restores);
29268 if (info->lr_save_p)
29269 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29270 gen_rtx_REG (Pmode, LR_REGNO),
29271 cfa_restores);
29272 return cfa_restores;
29275 /* Return true if OFFSET from stack pointer can be clobbered by signals.
29276 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
29277 below stack pointer not cloberred by signals. */
29279 static inline bool
29280 offset_below_red_zone_p (HOST_WIDE_INT offset)
29282 return offset < (DEFAULT_ABI == ABI_V4
29284 : TARGET_32BIT ? -220 : -288);
29287 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
29289 static void
29290 emit_cfa_restores (rtx cfa_restores)
29292 rtx_insn *insn = get_last_insn ();
29293 rtx *loc = &REG_NOTES (insn);
29295 while (*loc)
29296 loc = &XEXP (*loc, 1);
29297 *loc = cfa_restores;
29298 RTX_FRAME_RELATED_P (insn) = 1;
29301 /* Emit function epilogue as insns. */
29303 void
29304 rs6000_emit_epilogue (int sibcall)
29306 rs6000_stack_t *info;
29307 int restoring_GPRs_inline;
29308 int restoring_FPRs_inline;
29309 int using_load_multiple;
29310 int using_mtcr_multiple;
29311 int use_backchain_to_restore_sp;
29312 int restore_lr;
29313 int strategy;
29314 HOST_WIDE_INT frame_off = 0;
29315 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
29316 rtx frame_reg_rtx = sp_reg_rtx;
29317 rtx cfa_restores = NULL_RTX;
29318 rtx insn;
29319 rtx cr_save_reg = NULL_RTX;
29320 machine_mode reg_mode = Pmode;
29321 int reg_size = TARGET_32BIT ? 4 : 8;
29322 int i;
29323 bool exit_func;
29324 unsigned ptr_regno;
29326 info = rs6000_stack_info ();
29328 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29330 reg_mode = V2SImode;
29331 reg_size = 8;
29334 strategy = info->savres_strategy;
29335 using_load_multiple = strategy & REST_MULTIPLE;
29336 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
29337 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
29338 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
29339 || rs6000_cpu == PROCESSOR_PPC603
29340 || rs6000_cpu == PROCESSOR_PPC750
29341 || optimize_size);
29342 /* Restore via the backchain when we have a large frame, since this
29343 is more efficient than an addis, addi pair. The second condition
29344 here will not trigger at the moment; We don't actually need a
29345 frame pointer for alloca, but the generic parts of the compiler
29346 give us one anyway. */
29347 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
29348 ? info->lr_save_offset
29349 : 0) > 32767
29350 || (cfun->calls_alloca
29351 && !frame_pointer_needed));
29352 restore_lr = (info->lr_save_p
29353 && (restoring_FPRs_inline
29354 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
29355 && (restoring_GPRs_inline
29356 || info->first_fp_reg_save < 64)
29357 && !cfun->machine->lr_is_wrapped_separately);
29360 if (WORLD_SAVE_P (info))
29362 int i, j;
29363 char rname[30];
29364 const char *alloc_rname;
29365 rtvec p;
29367 /* eh_rest_world_r10 will return to the location saved in the LR
29368 stack slot (which is not likely to be our caller.)
29369 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
29370 rest_world is similar, except any R10 parameter is ignored.
29371 The exception-handling stuff that was here in 2.95 is no
29372 longer necessary. */
29374 p = rtvec_alloc (9
29375 + 32 - info->first_gp_reg_save
29376 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
29377 + 63 + 1 - info->first_fp_reg_save);
29379 strcpy (rname, ((crtl->calls_eh_return) ?
29380 "*eh_rest_world_r10" : "*rest_world"));
29381 alloc_rname = ggc_strdup (rname);
29383 j = 0;
29384 RTVEC_ELT (p, j++) = ret_rtx;
29385 RTVEC_ELT (p, j++)
29386 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
29387 /* The instruction pattern requires a clobber here;
29388 it is shared with the restVEC helper. */
29389 RTVEC_ELT (p, j++)
29390 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
29393 /* CR register traditionally saved as CR2. */
29394 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
29395 RTVEC_ELT (p, j++)
29396 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
29397 if (flag_shrink_wrap)
29399 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29400 gen_rtx_REG (Pmode, LR_REGNO),
29401 cfa_restores);
29402 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29406 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29408 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29409 RTVEC_ELT (p, j++)
29410 = gen_frame_load (reg,
29411 frame_reg_rtx, info->gp_save_offset + reg_size * i);
29412 if (flag_shrink_wrap)
29413 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29415 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29417 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
29418 RTVEC_ELT (p, j++)
29419 = gen_frame_load (reg,
29420 frame_reg_rtx, info->altivec_save_offset + 16 * i);
29421 if (flag_shrink_wrap)
29422 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29424 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
29426 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29427 ? DFmode : SFmode),
29428 info->first_fp_reg_save + i);
29429 RTVEC_ELT (p, j++)
29430 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
29431 if (flag_shrink_wrap)
29432 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29434 RTVEC_ELT (p, j++)
29435 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
29436 RTVEC_ELT (p, j++)
29437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
29438 RTVEC_ELT (p, j++)
29439 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
29440 RTVEC_ELT (p, j++)
29441 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
29442 RTVEC_ELT (p, j++)
29443 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
29444 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29446 if (flag_shrink_wrap)
29448 REG_NOTES (insn) = cfa_restores;
29449 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29450 RTX_FRAME_RELATED_P (insn) = 1;
29452 return;
29455 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
29456 if (info->push_p)
29457 frame_off = info->total_size;
29459 /* Restore AltiVec registers if we must do so before adjusting the
29460 stack. */
29461 if (info->altivec_size != 0
29462 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29463 || (DEFAULT_ABI != ABI_V4
29464 && offset_below_red_zone_p (info->altivec_save_offset))))
29466 int i;
29467 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29469 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29470 if (use_backchain_to_restore_sp)
29472 int frame_regno = 11;
29474 if ((strategy & REST_INLINE_VRS) == 0)
29476 /* Of r11 and r12, select the one not clobbered by an
29477 out-of-line restore function for the frame register. */
29478 frame_regno = 11 + 12 - scratch_regno;
29480 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
29481 emit_move_insn (frame_reg_rtx,
29482 gen_rtx_MEM (Pmode, sp_reg_rtx));
29483 frame_off = 0;
29485 else if (frame_pointer_needed)
29486 frame_reg_rtx = hard_frame_pointer_rtx;
29488 if ((strategy & REST_INLINE_VRS) == 0)
29490 int end_save = info->altivec_save_offset + info->altivec_size;
29491 int ptr_off;
29492 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29493 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29495 if (end_save + frame_off != 0)
29497 rtx offset = GEN_INT (end_save + frame_off);
29499 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29501 else
29502 emit_move_insn (ptr_reg, frame_reg_rtx);
29504 ptr_off = -end_save;
29505 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29506 info->altivec_save_offset + ptr_off,
29507 0, V4SImode, SAVRES_VR);
29509 else
29511 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29512 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29514 rtx addr, areg, mem, insn;
29515 rtx reg = gen_rtx_REG (V4SImode, i);
29516 HOST_WIDE_INT offset
29517 = (info->altivec_save_offset + frame_off
29518 + 16 * (i - info->first_altivec_reg_save));
29520 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29522 mem = gen_frame_mem (V4SImode,
29523 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29524 GEN_INT (offset)));
29525 insn = gen_rtx_SET (reg, mem);
29527 else
29529 areg = gen_rtx_REG (Pmode, 0);
29530 emit_move_insn (areg, GEN_INT (offset));
29532 /* AltiVec addressing mode is [reg+reg]. */
29533 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29534 mem = gen_frame_mem (V4SImode, addr);
29536 /* Rather than emitting a generic move, force use of the
29537 lvx instruction, which we always want. In particular we
29538 don't want lxvd2x/xxpermdi for little endian. */
29539 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29542 (void) emit_insn (insn);
29546 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29547 if (((strategy & REST_INLINE_VRS) == 0
29548 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29549 && (flag_shrink_wrap
29550 || (offset_below_red_zone_p
29551 (info->altivec_save_offset
29552 + 16 * (i - info->first_altivec_reg_save)))))
29554 rtx reg = gen_rtx_REG (V4SImode, i);
29555 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29559 /* Restore VRSAVE if we must do so before adjusting the stack. */
29560 if (info->vrsave_size != 0
29561 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29562 || (DEFAULT_ABI != ABI_V4
29563 && offset_below_red_zone_p (info->vrsave_save_offset))))
29565 rtx reg;
29567 if (frame_reg_rtx == sp_reg_rtx)
29569 if (use_backchain_to_restore_sp)
29571 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29572 emit_move_insn (frame_reg_rtx,
29573 gen_rtx_MEM (Pmode, sp_reg_rtx));
29574 frame_off = 0;
29576 else if (frame_pointer_needed)
29577 frame_reg_rtx = hard_frame_pointer_rtx;
29580 reg = gen_rtx_REG (SImode, 12);
29581 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29582 info->vrsave_save_offset + frame_off));
29584 emit_insn (generate_set_vrsave (reg, info, 1));
29587 insn = NULL_RTX;
29588 /* If we have a large stack frame, restore the old stack pointer
29589 using the backchain. */
29590 if (use_backchain_to_restore_sp)
29592 if (frame_reg_rtx == sp_reg_rtx)
29594 /* Under V.4, don't reset the stack pointer until after we're done
29595 loading the saved registers. */
29596 if (DEFAULT_ABI == ABI_V4)
29597 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29599 insn = emit_move_insn (frame_reg_rtx,
29600 gen_rtx_MEM (Pmode, sp_reg_rtx));
29601 frame_off = 0;
29603 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29604 && DEFAULT_ABI == ABI_V4)
29605 /* frame_reg_rtx has been set up by the altivec restore. */
29607 else
29609 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
29610 frame_reg_rtx = sp_reg_rtx;
29613 /* If we have a frame pointer, we can restore the old stack pointer
29614 from it. */
29615 else if (frame_pointer_needed)
29617 frame_reg_rtx = sp_reg_rtx;
29618 if (DEFAULT_ABI == ABI_V4)
29619 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29620 /* Prevent reordering memory accesses against stack pointer restore. */
29621 else if (cfun->calls_alloca
29622 || offset_below_red_zone_p (-info->total_size))
29623 rs6000_emit_stack_tie (frame_reg_rtx, true);
29625 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
29626 GEN_INT (info->total_size)));
29627 frame_off = 0;
29629 else if (info->push_p
29630 && DEFAULT_ABI != ABI_V4
29631 && !crtl->calls_eh_return)
29633 /* Prevent reordering memory accesses against stack pointer restore. */
29634 if (cfun->calls_alloca
29635 || offset_below_red_zone_p (-info->total_size))
29636 rs6000_emit_stack_tie (frame_reg_rtx, false);
29637 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
29638 GEN_INT (info->total_size)));
29639 frame_off = 0;
29641 if (insn && frame_reg_rtx == sp_reg_rtx)
29643 if (cfa_restores)
29645 REG_NOTES (insn) = cfa_restores;
29646 cfa_restores = NULL_RTX;
29648 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29649 RTX_FRAME_RELATED_P (insn) = 1;
29652 /* Restore AltiVec registers if we have not done so already. */
29653 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29654 && info->altivec_size != 0
29655 && (DEFAULT_ABI == ABI_V4
29656 || !offset_below_red_zone_p (info->altivec_save_offset)))
29658 int i;
29660 if ((strategy & REST_INLINE_VRS) == 0)
29662 int end_save = info->altivec_save_offset + info->altivec_size;
29663 int ptr_off;
29664 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29665 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29666 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29668 if (end_save + frame_off != 0)
29670 rtx offset = GEN_INT (end_save + frame_off);
29672 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29674 else
29675 emit_move_insn (ptr_reg, frame_reg_rtx);
29677 ptr_off = -end_save;
29678 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29679 info->altivec_save_offset + ptr_off,
29680 0, V4SImode, SAVRES_VR);
29681 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29683 /* Frame reg was clobbered by out-of-line save. Restore it
29684 from ptr_reg, and if we are calling out-of-line gpr or
29685 fpr restore set up the correct pointer and offset. */
29686 unsigned newptr_regno = 1;
29687 if (!restoring_GPRs_inline)
29689 bool lr = info->gp_save_offset + info->gp_size == 0;
29690 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29691 newptr_regno = ptr_regno_for_savres (sel);
29692 end_save = info->gp_save_offset + info->gp_size;
29694 else if (!restoring_FPRs_inline)
29696 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
29697 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29698 newptr_regno = ptr_regno_for_savres (sel);
29699 end_save = info->fp_save_offset + info->fp_size;
29702 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
29703 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
29705 if (end_save + ptr_off != 0)
29707 rtx offset = GEN_INT (end_save + ptr_off);
29709 frame_off = -end_save;
29710 if (TARGET_32BIT)
29711 emit_insn (gen_addsi3_carry (frame_reg_rtx,
29712 ptr_reg, offset));
29713 else
29714 emit_insn (gen_adddi3_carry (frame_reg_rtx,
29715 ptr_reg, offset));
29717 else
29719 frame_off = ptr_off;
29720 emit_move_insn (frame_reg_rtx, ptr_reg);
29724 else
29726 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29727 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29729 rtx addr, areg, mem, insn;
29730 rtx reg = gen_rtx_REG (V4SImode, i);
29731 HOST_WIDE_INT offset
29732 = (info->altivec_save_offset + frame_off
29733 + 16 * (i - info->first_altivec_reg_save));
29735 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29737 mem = gen_frame_mem (V4SImode,
29738 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29739 GEN_INT (offset)));
29740 insn = gen_rtx_SET (reg, mem);
29742 else
29744 areg = gen_rtx_REG (Pmode, 0);
29745 emit_move_insn (areg, GEN_INT (offset));
29747 /* AltiVec addressing mode is [reg+reg]. */
29748 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29749 mem = gen_frame_mem (V4SImode, addr);
29751 /* Rather than emitting a generic move, force use of the
29752 lvx instruction, which we always want. In particular we
29753 don't want lxvd2x/xxpermdi for little endian. */
29754 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29757 (void) emit_insn (insn);
29761 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29762 if (((strategy & REST_INLINE_VRS) == 0
29763 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29764 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29766 rtx reg = gen_rtx_REG (V4SImode, i);
29767 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29771 /* Restore VRSAVE if we have not done so already. */
29772 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29773 && info->vrsave_size != 0
29774 && (DEFAULT_ABI == ABI_V4
29775 || !offset_below_red_zone_p (info->vrsave_save_offset)))
29777 rtx reg;
29779 reg = gen_rtx_REG (SImode, 12);
29780 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29781 info->vrsave_save_offset + frame_off));
29783 emit_insn (generate_set_vrsave (reg, info, 1));
29786 /* If we exit by an out-of-line restore function on ABI_V4 then that
29787 function will deallocate the stack, so we don't need to worry
29788 about the unwinder restoring cr from an invalid stack frame
29789 location. */
29790 exit_func = (!restoring_FPRs_inline
29791 || (!restoring_GPRs_inline
29792 && info->first_fp_reg_save == 64));
29794 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
29795 *separate* slots if the routine calls __builtin_eh_return, so
29796 that they can be independently restored by the unwinder. */
29797 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29799 int i, cr_off = info->ehcr_offset;
29801 for (i = 0; i < 8; i++)
29802 if (!call_used_regs[CR0_REGNO + i])
29804 rtx reg = gen_rtx_REG (SImode, 0);
29805 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29806 cr_off + frame_off));
29808 insn = emit_insn (gen_movsi_to_cr_one
29809 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29811 if (!exit_func && flag_shrink_wrap)
29813 add_reg_note (insn, REG_CFA_RESTORE,
29814 gen_rtx_REG (SImode, CR0_REGNO + i));
29816 RTX_FRAME_RELATED_P (insn) = 1;
29819 cr_off += reg_size;
29823 /* Get the old lr if we saved it. If we are restoring registers
29824 out-of-line, then the out-of-line routines can do this for us. */
29825 if (restore_lr && restoring_GPRs_inline)
29826 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29828 /* Get the old cr if we saved it. */
29829 if (info->cr_save_p)
29831 unsigned cr_save_regno = 12;
29833 if (!restoring_GPRs_inline)
29835 /* Ensure we don't use the register used by the out-of-line
29836 gpr register restore below. */
29837 bool lr = info->gp_save_offset + info->gp_size == 0;
29838 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29839 int gpr_ptr_regno = ptr_regno_for_savres (sel);
29841 if (gpr_ptr_regno == 12)
29842 cr_save_regno = 11;
29843 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
29845 else if (REGNO (frame_reg_rtx) == 12)
29846 cr_save_regno = 11;
29848 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
29849 info->cr_save_offset + frame_off,
29850 exit_func);
29853 /* Set LR here to try to overlap restores below. */
29854 if (restore_lr && restoring_GPRs_inline)
29855 restore_saved_lr (0, exit_func);
29857 /* Load exception handler data registers, if needed. */
29858 if (crtl->calls_eh_return)
29860 unsigned int i, regno;
29862 if (TARGET_AIX)
29864 rtx reg = gen_rtx_REG (reg_mode, 2);
29865 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29866 frame_off + RS6000_TOC_SAVE_SLOT));
29869 for (i = 0; ; ++i)
29871 rtx mem;
29873 regno = EH_RETURN_DATA_REGNO (i);
29874 if (regno == INVALID_REGNUM)
29875 break;
29877 /* Note: possible use of r0 here to address SPE regs. */
29878 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
29879 info->ehrd_offset + frame_off
29880 + reg_size * (int) i);
29882 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
29886 /* Restore GPRs. This is done as a PARALLEL if we are using
29887 the load-multiple instructions. */
29888 if (TARGET_SPE_ABI
29889 && info->spe_64bit_regs_used
29890 && info->first_gp_reg_save != 32)
29892 /* Determine whether we can address all of the registers that need
29893 to be saved with an offset from frame_reg_rtx that fits in
29894 the small const field for SPE memory instructions. */
29895 int spe_regs_addressable
29896 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29897 + reg_size * (32 - info->first_gp_reg_save - 1))
29898 && restoring_GPRs_inline);
29900 if (!spe_regs_addressable)
29902 int ool_adjust = 0;
29903 rtx old_frame_reg_rtx = frame_reg_rtx;
29904 /* Make r11 point to the start of the SPE save area. We worried about
29905 not clobbering it when we were saving registers in the prologue.
29906 There's no need to worry here because the static chain is passed
29907 anew to every function. */
29909 if (!restoring_GPRs_inline)
29910 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29911 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29912 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
29913 GEN_INT (info->spe_gp_save_offset
29914 + frame_off
29915 - ool_adjust)));
29916 /* Keep the invariant that frame_reg_rtx + frame_off points
29917 at the top of the stack frame. */
29918 frame_off = -info->spe_gp_save_offset + ool_adjust;
29921 if (restoring_GPRs_inline)
29923 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
29925 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29926 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29928 rtx offset, addr, mem, reg;
29930 /* We're doing all this to ensure that the immediate offset
29931 fits into the immediate field of 'evldd'. */
29932 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
29934 offset = GEN_INT (spe_offset + reg_size * i);
29935 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
29936 mem = gen_rtx_MEM (V2SImode, addr);
29937 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29939 emit_move_insn (reg, mem);
29942 else
29943 rs6000_emit_savres_rtx (info, frame_reg_rtx,
29944 info->spe_gp_save_offset + frame_off,
29945 info->lr_save_offset + frame_off,
29946 reg_mode,
29947 SAVRES_GPR | SAVRES_LR);
29949 else if (!restoring_GPRs_inline)
29951 /* We are jumping to an out-of-line function. */
29952 rtx ptr_reg;
29953 int end_save = info->gp_save_offset + info->gp_size;
29954 bool can_use_exit = end_save == 0;
29955 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
29956 int ptr_off;
29958 /* Emit stack reset code if we need it. */
29959 ptr_regno = ptr_regno_for_savres (sel);
29960 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29961 if (can_use_exit)
29962 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29963 else if (end_save + frame_off != 0)
29964 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
29965 GEN_INT (end_save + frame_off)));
29966 else if (REGNO (frame_reg_rtx) != ptr_regno)
29967 emit_move_insn (ptr_reg, frame_reg_rtx);
29968 if (REGNO (frame_reg_rtx) == ptr_regno)
29969 frame_off = -end_save;
29971 if (can_use_exit && info->cr_save_p)
29972 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
29974 ptr_off = -end_save;
29975 rs6000_emit_savres_rtx (info, ptr_reg,
29976 info->gp_save_offset + ptr_off,
29977 info->lr_save_offset + ptr_off,
29978 reg_mode, sel);
29980 else if (using_load_multiple)
29982 rtvec p;
29983 p = rtvec_alloc (32 - info->first_gp_reg_save);
29984 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29985 RTVEC_ELT (p, i)
29986 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29987 frame_reg_rtx,
29988 info->gp_save_offset + frame_off + reg_size * i);
29989 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29991 else
29993 int offset = info->gp_save_offset + frame_off;
29994 for (i = info->first_gp_reg_save; i < 32; i++)
29996 if (rs6000_reg_live_or_pic_offset_p (i)
29997 && !cfun->machine->gpr_is_wrapped_separately[i])
29999 rtx reg = gen_rtx_REG (reg_mode, i);
30000 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
30003 offset += reg_size;
30007 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
30009 /* If the frame pointer was used then we can't delay emitting
30010 a REG_CFA_DEF_CFA note. This must happen on the insn that
30011 restores the frame pointer, r31. We may have already emitted
30012 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
30013 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
30014 be harmless if emitted. */
30015 if (frame_pointer_needed)
30017 insn = get_last_insn ();
30018 add_reg_note (insn, REG_CFA_DEF_CFA,
30019 plus_constant (Pmode, frame_reg_rtx, frame_off));
30020 RTX_FRAME_RELATED_P (insn) = 1;
30023 /* Set up cfa_restores. We always need these when
30024 shrink-wrapping. If not shrink-wrapping then we only need
30025 the cfa_restore when the stack location is no longer valid.
30026 The cfa_restores must be emitted on or before the insn that
30027 invalidates the stack, and of course must not be emitted
30028 before the insn that actually does the restore. The latter
30029 is why it is a bad idea to emit the cfa_restores as a group
30030 on the last instruction here that actually does a restore:
30031 That insn may be reordered with respect to others doing
30032 restores. */
30033 if (flag_shrink_wrap
30034 && !restoring_GPRs_inline
30035 && info->first_fp_reg_save == 64)
30036 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30038 for (i = info->first_gp_reg_save; i < 32; i++)
30039 if (!restoring_GPRs_inline
30040 || using_load_multiple
30041 || rs6000_reg_live_or_pic_offset_p (i))
30043 if (cfun->machine->gpr_is_wrapped_separately[i])
30044 continue;
30046 rtx reg = gen_rtx_REG (reg_mode, i);
30047 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30051 if (!restoring_GPRs_inline
30052 && info->first_fp_reg_save == 64)
30054 /* We are jumping to an out-of-line function. */
30055 if (cfa_restores)
30056 emit_cfa_restores (cfa_restores);
30057 return;
30060 if (restore_lr && !restoring_GPRs_inline)
30062 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
30063 restore_saved_lr (0, exit_func);
30066 /* Restore fpr's if we need to do it without calling a function. */
30067 if (restoring_FPRs_inline)
30068 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30069 if (save_reg_p (info->first_fp_reg_save + i))
30071 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
30072 ? DFmode : SFmode),
30073 info->first_fp_reg_save + i);
30074 emit_insn (gen_frame_load (reg, frame_reg_rtx,
30075 info->fp_save_offset + frame_off + 8 * i));
30076 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
30077 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30080 /* If we saved cr, restore it here. Just those that were used. */
30081 if (info->cr_save_p)
30082 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
30084 /* If this is V.4, unwind the stack pointer after all of the loads
30085 have been done, or set up r11 if we are restoring fp out of line. */
30086 ptr_regno = 1;
30087 if (!restoring_FPRs_inline)
30089 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30090 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30091 ptr_regno = ptr_regno_for_savres (sel);
30094 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
30095 if (REGNO (frame_reg_rtx) == ptr_regno)
30096 frame_off = 0;
30098 if (insn && restoring_FPRs_inline)
30100 if (cfa_restores)
30102 REG_NOTES (insn) = cfa_restores;
30103 cfa_restores = NULL_RTX;
30105 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30106 RTX_FRAME_RELATED_P (insn) = 1;
30109 if (crtl->calls_eh_return)
30111 rtx sa = EH_RETURN_STACKADJ_RTX;
30112 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
30115 if (!sibcall && restoring_FPRs_inline)
30117 if (cfa_restores)
30119 /* We can't hang the cfa_restores off a simple return,
30120 since the shrink-wrap code sometimes uses an existing
30121 return. This means there might be a path from
30122 pre-prologue code to this return, and dwarf2cfi code
30123 wants the eh_frame unwinder state to be the same on
30124 all paths to any point. So we need to emit the
30125 cfa_restores before the return. For -m64 we really
30126 don't need epilogue cfa_restores at all, except for
30127 this irritating dwarf2cfi with shrink-wrap
30128 requirement; The stack red-zone means eh_frame info
30129 from the prologue telling the unwinder to restore
30130 from the stack is perfectly good right to the end of
30131 the function. */
30132 emit_insn (gen_blockage ());
30133 emit_cfa_restores (cfa_restores);
30134 cfa_restores = NULL_RTX;
30137 emit_jump_insn (targetm.gen_simple_return ());
30140 if (!sibcall && !restoring_FPRs_inline)
30142 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30143 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
30144 int elt = 0;
30145 RTVEC_ELT (p, elt++) = ret_rtx;
30146 if (lr)
30147 RTVEC_ELT (p, elt++)
30148 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
30150 /* We have to restore more than two FP registers, so branch to the
30151 restore function. It will return to our caller. */
30152 int i;
30153 int reg;
30154 rtx sym;
30156 if (flag_shrink_wrap)
30157 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30159 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
30160 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
30161 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
30162 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
30164 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30166 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
30168 RTVEC_ELT (p, elt++)
30169 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
30170 if (flag_shrink_wrap)
30171 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30174 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30177 if (cfa_restores)
30179 if (sibcall)
30180 /* Ensure the cfa_restores are hung off an insn that won't
30181 be reordered above other restores. */
30182 emit_insn (gen_blockage ());
30184 emit_cfa_restores (cfa_restores);
30188 /* Write function epilogue. */
30190 static void
30191 rs6000_output_function_epilogue (FILE *file,
30192 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
30194 #if TARGET_MACHO
30195 macho_branch_islands ();
30196 /* Mach-O doesn't support labels at the end of objects, so if
30197 it looks like we might want one, insert a NOP. */
30199 rtx_insn *insn = get_last_insn ();
30200 rtx_insn *deleted_debug_label = NULL;
30201 while (insn
30202 && NOTE_P (insn)
30203 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
30205 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
30206 notes only, instead set their CODE_LABEL_NUMBER to -1,
30207 otherwise there would be code generation differences
30208 in between -g and -g0. */
30209 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30210 deleted_debug_label = insn;
30211 insn = PREV_INSN (insn);
30213 if (insn
30214 && (LABEL_P (insn)
30215 || (NOTE_P (insn)
30216 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
30217 fputs ("\tnop\n", file);
30218 else if (deleted_debug_label)
30219 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
30220 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30221 CODE_LABEL_NUMBER (insn) = -1;
30223 #endif
30225 /* Output a traceback table here. See /usr/include/sys/debug.h for info
30226 on its format.
30228 We don't output a traceback table if -finhibit-size-directive was
30229 used. The documentation for -finhibit-size-directive reads
30230 ``don't output a @code{.size} assembler directive, or anything
30231 else that would cause trouble if the function is split in the
30232 middle, and the two halves are placed at locations far apart in
30233 memory.'' The traceback table has this property, since it
30234 includes the offset from the start of the function to the
30235 traceback table itself.
30237 System V.4 Powerpc's (and the embedded ABI derived from it) use a
30238 different traceback table. */
30239 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30240 && ! flag_inhibit_size_directive
30241 && rs6000_traceback != traceback_none && !cfun->is_thunk)
30243 const char *fname = NULL;
30244 const char *language_string = lang_hooks.name;
30245 int fixed_parms = 0, float_parms = 0, parm_info = 0;
30246 int i;
30247 int optional_tbtab;
30248 rs6000_stack_t *info = rs6000_stack_info ();
30250 if (rs6000_traceback == traceback_full)
30251 optional_tbtab = 1;
30252 else if (rs6000_traceback == traceback_part)
30253 optional_tbtab = 0;
30254 else
30255 optional_tbtab = !optimize_size && !TARGET_ELF;
30257 if (optional_tbtab)
30259 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30260 while (*fname == '.') /* V.4 encodes . in the name */
30261 fname++;
30263 /* Need label immediately before tbtab, so we can compute
30264 its offset from the function start. */
30265 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30266 ASM_OUTPUT_LABEL (file, fname);
30269 /* The .tbtab pseudo-op can only be used for the first eight
30270 expressions, since it can't handle the possibly variable
30271 length fields that follow. However, if you omit the optional
30272 fields, the assembler outputs zeros for all optional fields
30273 anyways, giving each variable length field is minimum length
30274 (as defined in sys/debug.h). Thus we can not use the .tbtab
30275 pseudo-op at all. */
30277 /* An all-zero word flags the start of the tbtab, for debuggers
30278 that have to find it by searching forward from the entry
30279 point or from the current pc. */
30280 fputs ("\t.long 0\n", file);
30282 /* Tbtab format type. Use format type 0. */
30283 fputs ("\t.byte 0,", file);
30285 /* Language type. Unfortunately, there does not seem to be any
30286 official way to discover the language being compiled, so we
30287 use language_string.
30288 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
30289 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
30290 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
30291 either, so for now use 0. */
30292 if (lang_GNU_C ()
30293 || ! strcmp (language_string, "GNU GIMPLE")
30294 || ! strcmp (language_string, "GNU Go")
30295 || ! strcmp (language_string, "libgccjit"))
30296 i = 0;
30297 else if (! strcmp (language_string, "GNU F77")
30298 || lang_GNU_Fortran ())
30299 i = 1;
30300 else if (! strcmp (language_string, "GNU Pascal"))
30301 i = 2;
30302 else if (! strcmp (language_string, "GNU Ada"))
30303 i = 3;
30304 else if (lang_GNU_CXX ()
30305 || ! strcmp (language_string, "GNU Objective-C++"))
30306 i = 9;
30307 else if (! strcmp (language_string, "GNU Java"))
30308 i = 13;
30309 else if (! strcmp (language_string, "GNU Objective-C"))
30310 i = 14;
30311 else
30312 gcc_unreachable ();
30313 fprintf (file, "%d,", i);
30315 /* 8 single bit fields: global linkage (not set for C extern linkage,
30316 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
30317 from start of procedure stored in tbtab, internal function, function
30318 has controlled storage, function has no toc, function uses fp,
30319 function logs/aborts fp operations. */
30320 /* Assume that fp operations are used if any fp reg must be saved. */
30321 fprintf (file, "%d,",
30322 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
30324 /* 6 bitfields: function is interrupt handler, name present in
30325 proc table, function calls alloca, on condition directives
30326 (controls stack walks, 3 bits), saves condition reg, saves
30327 link reg. */
30328 /* The `function calls alloca' bit seems to be set whenever reg 31 is
30329 set up as a frame pointer, even when there is no alloca call. */
30330 fprintf (file, "%d,",
30331 ((optional_tbtab << 6)
30332 | ((optional_tbtab & frame_pointer_needed) << 5)
30333 | (info->cr_save_p << 1)
30334 | (info->lr_save_p)));
30336 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
30337 (6 bits). */
30338 fprintf (file, "%d,",
30339 (info->push_p << 7) | (64 - info->first_fp_reg_save));
30341 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
30342 fprintf (file, "%d,", (32 - first_reg_to_save ()));
30344 if (optional_tbtab)
30346 /* Compute the parameter info from the function decl argument
30347 list. */
30348 tree decl;
30349 int next_parm_info_bit = 31;
30351 for (decl = DECL_ARGUMENTS (current_function_decl);
30352 decl; decl = DECL_CHAIN (decl))
30354 rtx parameter = DECL_INCOMING_RTL (decl);
30355 machine_mode mode = GET_MODE (parameter);
30357 if (GET_CODE (parameter) == REG)
30359 if (SCALAR_FLOAT_MODE_P (mode))
30361 int bits;
30363 float_parms++;
30365 switch (mode)
30367 case SFmode:
30368 case SDmode:
30369 bits = 0x2;
30370 break;
30372 case DFmode:
30373 case DDmode:
30374 case TFmode:
30375 case TDmode:
30376 case IFmode:
30377 case KFmode:
30378 bits = 0x3;
30379 break;
30381 default:
30382 gcc_unreachable ();
30385 /* If only one bit will fit, don't or in this entry. */
30386 if (next_parm_info_bit > 0)
30387 parm_info |= (bits << (next_parm_info_bit - 1));
30388 next_parm_info_bit -= 2;
30390 else
30392 fixed_parms += ((GET_MODE_SIZE (mode)
30393 + (UNITS_PER_WORD - 1))
30394 / UNITS_PER_WORD);
30395 next_parm_info_bit -= 1;
30401 /* Number of fixed point parameters. */
30402 /* This is actually the number of words of fixed point parameters; thus
30403 an 8 byte struct counts as 2; and thus the maximum value is 8. */
30404 fprintf (file, "%d,", fixed_parms);
30406 /* 2 bitfields: number of floating point parameters (7 bits), parameters
30407 all on stack. */
30408 /* This is actually the number of fp registers that hold parameters;
30409 and thus the maximum value is 13. */
30410 /* Set parameters on stack bit if parameters are not in their original
30411 registers, regardless of whether they are on the stack? Xlc
30412 seems to set the bit when not optimizing. */
30413 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
30415 if (optional_tbtab)
30417 /* Optional fields follow. Some are variable length. */
30419 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
30420 float, 11 double float. */
30421 /* There is an entry for each parameter in a register, in the order
30422 that they occur in the parameter list. Any intervening arguments
30423 on the stack are ignored. If the list overflows a long (max
30424 possible length 34 bits) then completely leave off all elements
30425 that don't fit. */
30426 /* Only emit this long if there was at least one parameter. */
30427 if (fixed_parms || float_parms)
30428 fprintf (file, "\t.long %d\n", parm_info);
30430 /* Offset from start of code to tb table. */
30431 fputs ("\t.long ", file);
30432 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30433 RS6000_OUTPUT_BASENAME (file, fname);
30434 putc ('-', file);
30435 rs6000_output_function_entry (file, fname);
30436 putc ('\n', file);
30438 /* Interrupt handler mask. */
30439 /* Omit this long, since we never set the interrupt handler bit
30440 above. */
30442 /* Number of CTL (controlled storage) anchors. */
30443 /* Omit this long, since the has_ctl bit is never set above. */
30445 /* Displacement into stack of each CTL anchor. */
30446 /* Omit this list of longs, because there are no CTL anchors. */
30448 /* Length of function name. */
30449 if (*fname == '*')
30450 ++fname;
30451 fprintf (file, "\t.short %d\n", (int) strlen (fname));
30453 /* Function name. */
30454 assemble_string (fname, strlen (fname));
30456 /* Register for alloca automatic storage; this is always reg 31.
30457 Only emit this if the alloca bit was set above. */
30458 if (frame_pointer_needed)
30459 fputs ("\t.byte 31\n", file);
30461 fputs ("\t.align 2\n", file);
30465 /* Arrange to define .LCTOC1 label, if not already done. */
30466 if (need_toc_init)
30468 need_toc_init = 0;
30469 if (!toc_initialized)
30471 switch_to_section (toc_section);
30472 switch_to_section (current_function_section ());
30477 /* -fsplit-stack support. */
30479 /* A SYMBOL_REF for __morestack. */
30480 static GTY(()) rtx morestack_ref;
30482 static rtx
30483 gen_add3_const (rtx rt, rtx ra, long c)
30485 if (TARGET_64BIT)
30486 return gen_adddi3 (rt, ra, GEN_INT (c));
30487 else
30488 return gen_addsi3 (rt, ra, GEN_INT (c));
30491 /* Emit -fsplit-stack prologue, which goes before the regular function
30492 prologue (at local entry point in the case of ELFv2). */
30494 void
30495 rs6000_expand_split_stack_prologue (void)
30497 rs6000_stack_t *info = rs6000_stack_info ();
30498 unsigned HOST_WIDE_INT allocate;
30499 long alloc_hi, alloc_lo;
30500 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
30501 rtx_insn *insn;
30503 gcc_assert (flag_split_stack && reload_completed);
30505 if (!info->push_p)
30506 return;
30508 if (global_regs[29])
30510 error ("-fsplit-stack uses register r29");
30511 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
30512 "conflicts with %qD", global_regs_decl[29]);
30515 allocate = info->total_size;
30516 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
30518 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
30519 return;
30521 if (morestack_ref == NULL_RTX)
30523 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
30524 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
30525 | SYMBOL_FLAG_FUNCTION);
30528 r0 = gen_rtx_REG (Pmode, 0);
30529 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30530 r12 = gen_rtx_REG (Pmode, 12);
30531 emit_insn (gen_load_split_stack_limit (r0));
30532 /* Always emit two insns here to calculate the requested stack,
30533 so that the linker can edit them when adjusting size for calling
30534 non-split-stack code. */
30535 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
30536 alloc_lo = -allocate - alloc_hi;
30537 if (alloc_hi != 0)
30539 emit_insn (gen_add3_const (r12, r1, alloc_hi));
30540 if (alloc_lo != 0)
30541 emit_insn (gen_add3_const (r12, r12, alloc_lo));
30542 else
30543 emit_insn (gen_nop ());
30545 else
30547 emit_insn (gen_add3_const (r12, r1, alloc_lo));
30548 emit_insn (gen_nop ());
30551 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30552 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
30553 ok_label = gen_label_rtx ();
30554 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30555 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
30556 gen_rtx_LABEL_REF (VOIDmode, ok_label),
30557 pc_rtx);
30558 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30559 JUMP_LABEL (jump) = ok_label;
30560 /* Mark the jump as very likely to be taken. */
30561 add_int_reg_note (jump, REG_BR_PROB,
30562 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
30564 lr = gen_rtx_REG (Pmode, LR_REGNO);
30565 insn = emit_move_insn (r0, lr);
30566 RTX_FRAME_RELATED_P (insn) = 1;
30567 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
30568 RTX_FRAME_RELATED_P (insn) = 1;
30570 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
30571 const0_rtx, const0_rtx));
30572 call_fusage = NULL_RTX;
30573 use_reg (&call_fusage, r12);
30574 /* Say the call uses r0, even though it doesn't, to stop regrename
30575 from twiddling with the insns saving lr, trashing args for cfun.
30576 The insns restoring lr are similarly protected by making
30577 split_stack_return use r0. */
30578 use_reg (&call_fusage, r0);
30579 add_function_usage_to (insn, call_fusage);
30580 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
30581 insn = emit_move_insn (lr, r0);
30582 add_reg_note (insn, REG_CFA_RESTORE, lr);
30583 RTX_FRAME_RELATED_P (insn) = 1;
30584 emit_insn (gen_split_stack_return ());
30586 emit_label (ok_label);
30587 LABEL_NUSES (ok_label) = 1;
30590 /* Return the internal arg pointer used for function incoming
30591 arguments. When -fsplit-stack, the arg pointer is r12 so we need
30592 to copy it to a pseudo in order for it to be preserved over calls
30593 and suchlike. We'd really like to use a pseudo here for the
30594 internal arg pointer but data-flow analysis is not prepared to
30595 accept pseudos as live at the beginning of a function. */
30597 static rtx
30598 rs6000_internal_arg_pointer (void)
30600 if (flag_split_stack
30601 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
30602 == NULL))
30605 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
30607 rtx pat;
30609 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
30610 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
30612 /* Put the pseudo initialization right after the note at the
30613 beginning of the function. */
30614 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
30615 gen_rtx_REG (Pmode, 12));
30616 push_topmost_sequence ();
30617 emit_insn_after (pat, get_insns ());
30618 pop_topmost_sequence ();
30620 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
30621 FIRST_PARM_OFFSET (current_function_decl));
30623 return virtual_incoming_args_rtx;
30626 /* We may have to tell the dataflow pass that the split stack prologue
30627 is initializing a register. */
30629 static void
30630 rs6000_live_on_entry (bitmap regs)
30632 if (flag_split_stack)
30633 bitmap_set_bit (regs, 12);
30636 /* Emit -fsplit-stack dynamic stack allocation space check. */
30638 void
30639 rs6000_split_stack_space_check (rtx size, rtx label)
30641 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30642 rtx limit = gen_reg_rtx (Pmode);
30643 rtx requested = gen_reg_rtx (Pmode);
30644 rtx cmp = gen_reg_rtx (CCUNSmode);
30645 rtx jump;
30647 emit_insn (gen_load_split_stack_limit (limit));
30648 if (CONST_INT_P (size))
30649 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
30650 else
30652 size = force_reg (Pmode, size);
30653 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
30655 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
30656 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30657 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
30658 gen_rtx_LABEL_REF (VOIDmode, label),
30659 pc_rtx);
30660 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30661 JUMP_LABEL (jump) = label;
30664 /* A C compound statement that outputs the assembler code for a thunk
30665 function, used to implement C++ virtual function calls with
30666 multiple inheritance. The thunk acts as a wrapper around a virtual
30667 function, adjusting the implicit object parameter before handing
30668 control off to the real function.
30670 First, emit code to add the integer DELTA to the location that
30671 contains the incoming first argument. Assume that this argument
30672 contains a pointer, and is the one used to pass the `this' pointer
30673 in C++. This is the incoming argument *before* the function
30674 prologue, e.g. `%o0' on a sparc. The addition must preserve the
30675 values of all other incoming arguments.
30677 After the addition, emit code to jump to FUNCTION, which is a
30678 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
30679 not touch the return address. Hence returning from FUNCTION will
30680 return to whoever called the current `thunk'.
30682 The effect must be as if FUNCTION had been called directly with the
30683 adjusted first argument. This macro is responsible for emitting
30684 all of the code for a thunk function; output_function_prologue()
30685 and output_function_epilogue() are not invoked.
30687 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
30688 been extracted from it.) It might possibly be useful on some
30689 targets, but probably not.
30691 If you do not define this macro, the target-independent code in the
30692 C++ frontend will generate a less efficient heavyweight thunk that
30693 calls FUNCTION instead of jumping to it. The generic approach does
30694 not support varargs. */
30696 static void
30697 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
30698 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
30699 tree function)
30701 rtx this_rtx, funexp;
30702 rtx_insn *insn;
30704 reload_completed = 1;
30705 epilogue_completed = 1;
30707 /* Mark the end of the (empty) prologue. */
30708 emit_note (NOTE_INSN_PROLOGUE_END);
30710 /* Find the "this" pointer. If the function returns a structure,
30711 the structure return pointer is in r3. */
30712 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
30713 this_rtx = gen_rtx_REG (Pmode, 4);
30714 else
30715 this_rtx = gen_rtx_REG (Pmode, 3);
30717 /* Apply the constant offset, if required. */
30718 if (delta)
30719 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
30721 /* Apply the offset from the vtable, if required. */
30722 if (vcall_offset)
30724 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
30725 rtx tmp = gen_rtx_REG (Pmode, 12);
30727 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
30728 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
30730 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
30731 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
30733 else
30735 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
30737 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
30739 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
30742 /* Generate a tail call to the target function. */
30743 if (!TREE_USED (function))
30745 assemble_external (function);
30746 TREE_USED (function) = 1;
30748 funexp = XEXP (DECL_RTL (function), 0);
30749 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
30751 #if TARGET_MACHO
30752 if (MACHOPIC_INDIRECT)
30753 funexp = machopic_indirect_call_target (funexp);
30754 #endif
30756 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
30757 generate sibcall RTL explicitly. */
30758 insn = emit_call_insn (
30759 gen_rtx_PARALLEL (VOIDmode,
30760 gen_rtvec (3,
30761 gen_rtx_CALL (VOIDmode,
30762 funexp, const0_rtx),
30763 gen_rtx_USE (VOIDmode, const0_rtx),
30764 simple_return_rtx)));
30765 SIBLING_CALL_P (insn) = 1;
30766 emit_barrier ();
30768 /* Run just enough of rest_of_compilation to get the insns emitted.
30769 There's not really enough bulk here to make other passes such as
30770 instruction scheduling worth while. Note that use_thunk calls
30771 assemble_start_function and assemble_end_function. */
30772 insn = get_insns ();
30773 shorten_branches (insn);
30774 final_start_function (insn, file, 1);
30775 final (insn, file, 1);
30776 final_end_function ();
30778 reload_completed = 0;
30779 epilogue_completed = 0;
30782 /* A quick summary of the various types of 'constant-pool tables'
30783 under PowerPC:
30785 Target Flags Name One table per
30786 AIX (none) AIX TOC object file
30787 AIX -mfull-toc AIX TOC object file
30788 AIX -mminimal-toc AIX minimal TOC translation unit
30789 SVR4/EABI (none) SVR4 SDATA object file
30790 SVR4/EABI -fpic SVR4 pic object file
30791 SVR4/EABI -fPIC SVR4 PIC translation unit
30792 SVR4/EABI -mrelocatable EABI TOC function
30793 SVR4/EABI -maix AIX TOC object file
30794 SVR4/EABI -maix -mminimal-toc
30795 AIX minimal TOC translation unit
30797 Name Reg. Set by entries contains:
30798 made by addrs? fp? sum?
30800 AIX TOC 2 crt0 as Y option option
30801 AIX minimal TOC 30 prolog gcc Y Y option
30802 SVR4 SDATA 13 crt0 gcc N Y N
30803 SVR4 pic 30 prolog ld Y not yet N
30804 SVR4 PIC 30 prolog gcc Y option option
30805 EABI TOC 30 prolog gcc Y option option
30809 /* Hash functions for the hash table. */
30811 static unsigned
30812 rs6000_hash_constant (rtx k)
30814 enum rtx_code code = GET_CODE (k);
30815 machine_mode mode = GET_MODE (k);
30816 unsigned result = (code << 3) ^ mode;
30817 const char *format;
30818 int flen, fidx;
30820 format = GET_RTX_FORMAT (code);
30821 flen = strlen (format);
30822 fidx = 0;
30824 switch (code)
30826 case LABEL_REF:
30827 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
30829 case CONST_WIDE_INT:
30831 int i;
30832 flen = CONST_WIDE_INT_NUNITS (k);
30833 for (i = 0; i < flen; i++)
30834 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
30835 return result;
30838 case CONST_DOUBLE:
30839 if (mode != VOIDmode)
30840 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
30841 flen = 2;
30842 break;
30844 case CODE_LABEL:
30845 fidx = 3;
30846 break;
30848 default:
30849 break;
30852 for (; fidx < flen; fidx++)
30853 switch (format[fidx])
30855 case 's':
30857 unsigned i, len;
30858 const char *str = XSTR (k, fidx);
30859 len = strlen (str);
30860 result = result * 613 + len;
30861 for (i = 0; i < len; i++)
30862 result = result * 613 + (unsigned) str[i];
30863 break;
30865 case 'u':
30866 case 'e':
30867 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
30868 break;
30869 case 'i':
30870 case 'n':
30871 result = result * 613 + (unsigned) XINT (k, fidx);
30872 break;
30873 case 'w':
30874 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
30875 result = result * 613 + (unsigned) XWINT (k, fidx);
30876 else
30878 size_t i;
30879 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
30880 result = result * 613 + (unsigned) (XWINT (k, fidx)
30881 >> CHAR_BIT * i);
30883 break;
30884 case '0':
30885 break;
30886 default:
30887 gcc_unreachable ();
30890 return result;
30893 hashval_t
30894 toc_hasher::hash (toc_hash_struct *thc)
30896 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
30899 /* Compare H1 and H2 for equivalence. */
30901 bool
30902 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
30904 rtx r1 = h1->key;
30905 rtx r2 = h2->key;
30907 if (h1->key_mode != h2->key_mode)
30908 return 0;
30910 return rtx_equal_p (r1, r2);
30913 /* These are the names given by the C++ front-end to vtables, and
30914 vtable-like objects. Ideally, this logic should not be here;
30915 instead, there should be some programmatic way of inquiring as
30916 to whether or not an object is a vtable. */
30918 #define VTABLE_NAME_P(NAME) \
30919 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
30920 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
30921 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
30922 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
30923 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
30925 #ifdef NO_DOLLAR_IN_LABEL
30926 /* Return a GGC-allocated character string translating dollar signs in
30927 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
30929 const char *
30930 rs6000_xcoff_strip_dollar (const char *name)
30932 char *strip, *p;
30933 const char *q;
30934 size_t len;
30936 q = (const char *) strchr (name, '$');
30938 if (q == 0 || q == name)
30939 return name;
30941 len = strlen (name);
30942 strip = XALLOCAVEC (char, len + 1);
30943 strcpy (strip, name);
30944 p = strip + (q - name);
30945 while (p)
30947 *p = '_';
30948 p = strchr (p + 1, '$');
30951 return ggc_alloc_string (strip, len);
30953 #endif
30955 void
30956 rs6000_output_symbol_ref (FILE *file, rtx x)
30958 const char *name = XSTR (x, 0);
30960 /* Currently C++ toc references to vtables can be emitted before it
30961 is decided whether the vtable is public or private. If this is
30962 the case, then the linker will eventually complain that there is
30963 a reference to an unknown section. Thus, for vtables only,
30964 we emit the TOC reference to reference the identifier and not the
30965 symbol. */
30966 if (VTABLE_NAME_P (name))
30968 RS6000_OUTPUT_BASENAME (file, name);
30970 else
30971 assemble_name (file, name);
30974 /* Output a TOC entry. We derive the entry name from what is being
30975 written. */
30977 void
30978 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
30980 char buf[256];
30981 const char *name = buf;
30982 rtx base = x;
30983 HOST_WIDE_INT offset = 0;
30985 gcc_assert (!TARGET_NO_TOC);
30987 /* When the linker won't eliminate them, don't output duplicate
30988 TOC entries (this happens on AIX if there is any kind of TOC,
30989 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
30990 CODE_LABELs. */
30991 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
30993 struct toc_hash_struct *h;
30995 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
30996 time because GGC is not initialized at that point. */
30997 if (toc_hash_table == NULL)
30998 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
31000 h = ggc_alloc<toc_hash_struct> ();
31001 h->key = x;
31002 h->key_mode = mode;
31003 h->labelno = labelno;
31005 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
31006 if (*found == NULL)
31007 *found = h;
31008 else /* This is indeed a duplicate.
31009 Set this label equal to that label. */
31011 fputs ("\t.set ", file);
31012 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
31013 fprintf (file, "%d,", labelno);
31014 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
31015 fprintf (file, "%d\n", ((*found)->labelno));
31017 #ifdef HAVE_AS_TLS
31018 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
31019 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
31020 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
31022 fputs ("\t.set ", file);
31023 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
31024 fprintf (file, "%d,", labelno);
31025 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
31026 fprintf (file, "%d\n", ((*found)->labelno));
31028 #endif
31029 return;
31033 /* If we're going to put a double constant in the TOC, make sure it's
31034 aligned properly when strict alignment is on. */
31035 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
31036 && STRICT_ALIGNMENT
31037 && GET_MODE_BITSIZE (mode) >= 64
31038 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
31039 ASM_OUTPUT_ALIGN (file, 3);
31042 (*targetm.asm_out.internal_label) (file, "LC", labelno);
31044 /* Handle FP constants specially. Note that if we have a minimal
31045 TOC, things we put here aren't actually in the TOC, so we can allow
31046 FP constants. */
31047 if (GET_CODE (x) == CONST_DOUBLE &&
31048 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
31049 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
31051 long k[4];
31053 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31054 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
31055 else
31056 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31058 if (TARGET_64BIT)
31060 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31061 fputs (DOUBLE_INT_ASM_OP, file);
31062 else
31063 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
31064 k[0] & 0xffffffff, k[1] & 0xffffffff,
31065 k[2] & 0xffffffff, k[3] & 0xffffffff);
31066 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
31067 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31068 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
31069 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
31070 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
31071 return;
31073 else
31075 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31076 fputs ("\t.long ", file);
31077 else
31078 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
31079 k[0] & 0xffffffff, k[1] & 0xffffffff,
31080 k[2] & 0xffffffff, k[3] & 0xffffffff);
31081 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
31082 k[0] & 0xffffffff, k[1] & 0xffffffff,
31083 k[2] & 0xffffffff, k[3] & 0xffffffff);
31084 return;
31087 else if (GET_CODE (x) == CONST_DOUBLE &&
31088 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
31090 long k[2];
31092 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31093 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
31094 else
31095 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31097 if (TARGET_64BIT)
31099 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31100 fputs (DOUBLE_INT_ASM_OP, file);
31101 else
31102 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31103 k[0] & 0xffffffff, k[1] & 0xffffffff);
31104 fprintf (file, "0x%lx%08lx\n",
31105 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31106 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
31107 return;
31109 else
31111 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31112 fputs ("\t.long ", file);
31113 else
31114 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31115 k[0] & 0xffffffff, k[1] & 0xffffffff);
31116 fprintf (file, "0x%lx,0x%lx\n",
31117 k[0] & 0xffffffff, k[1] & 0xffffffff);
31118 return;
31121 else if (GET_CODE (x) == CONST_DOUBLE &&
31122 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
31124 long l;
31126 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31127 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
31128 else
31129 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
31131 if (TARGET_64BIT)
31133 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31134 fputs (DOUBLE_INT_ASM_OP, file);
31135 else
31136 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31137 if (WORDS_BIG_ENDIAN)
31138 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
31139 else
31140 fprintf (file, "0x%lx\n", l & 0xffffffff);
31141 return;
31143 else
31145 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31146 fputs ("\t.long ", file);
31147 else
31148 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31149 fprintf (file, "0x%lx\n", l & 0xffffffff);
31150 return;
31153 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
31155 unsigned HOST_WIDE_INT low;
31156 HOST_WIDE_INT high;
31158 low = INTVAL (x) & 0xffffffff;
31159 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
31161 /* TOC entries are always Pmode-sized, so when big-endian
31162 smaller integer constants in the TOC need to be padded.
31163 (This is still a win over putting the constants in
31164 a separate constant pool, because then we'd have
31165 to have both a TOC entry _and_ the actual constant.)
31167 For a 32-bit target, CONST_INT values are loaded and shifted
31168 entirely within `low' and can be stored in one TOC entry. */
31170 /* It would be easy to make this work, but it doesn't now. */
31171 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
31173 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
31175 low |= high << 32;
31176 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
31177 high = (HOST_WIDE_INT) low >> 32;
31178 low &= 0xffffffff;
31181 if (TARGET_64BIT)
31183 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31184 fputs (DOUBLE_INT_ASM_OP, file);
31185 else
31186 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31187 (long) high & 0xffffffff, (long) low & 0xffffffff);
31188 fprintf (file, "0x%lx%08lx\n",
31189 (long) high & 0xffffffff, (long) low & 0xffffffff);
31190 return;
31192 else
31194 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
31196 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31197 fputs ("\t.long ", file);
31198 else
31199 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31200 (long) high & 0xffffffff, (long) low & 0xffffffff);
31201 fprintf (file, "0x%lx,0x%lx\n",
31202 (long) high & 0xffffffff, (long) low & 0xffffffff);
31204 else
31206 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31207 fputs ("\t.long ", file);
31208 else
31209 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
31210 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
31212 return;
31216 if (GET_CODE (x) == CONST)
31218 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
31219 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
31221 base = XEXP (XEXP (x, 0), 0);
31222 offset = INTVAL (XEXP (XEXP (x, 0), 1));
31225 switch (GET_CODE (base))
31227 case SYMBOL_REF:
31228 name = XSTR (base, 0);
31229 break;
31231 case LABEL_REF:
31232 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
31233 CODE_LABEL_NUMBER (XEXP (base, 0)));
31234 break;
31236 case CODE_LABEL:
31237 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
31238 break;
31240 default:
31241 gcc_unreachable ();
31244 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31245 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
31246 else
31248 fputs ("\t.tc ", file);
31249 RS6000_OUTPUT_BASENAME (file, name);
31251 if (offset < 0)
31252 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
31253 else if (offset)
31254 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
31256 /* Mark large TOC symbols on AIX with [TE] so they are mapped
31257 after other TOC symbols, reducing overflow of small TOC access
31258 to [TC] symbols. */
31259 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
31260 ? "[TE]," : "[TC],", file);
31263 /* Currently C++ toc references to vtables can be emitted before it
31264 is decided whether the vtable is public or private. If this is
31265 the case, then the linker will eventually complain that there is
31266 a TOC reference to an unknown section. Thus, for vtables only,
31267 we emit the TOC reference to reference the symbol and not the
31268 section. */
31269 if (VTABLE_NAME_P (name))
31271 RS6000_OUTPUT_BASENAME (file, name);
31272 if (offset < 0)
31273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
31274 else if (offset > 0)
31275 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
31277 else
31278 output_addr_const (file, x);
31280 #if HAVE_AS_TLS
31281 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
31283 switch (SYMBOL_REF_TLS_MODEL (base))
31285 case 0:
31286 break;
31287 case TLS_MODEL_LOCAL_EXEC:
31288 fputs ("@le", file);
31289 break;
31290 case TLS_MODEL_INITIAL_EXEC:
31291 fputs ("@ie", file);
31292 break;
31293 /* Use global-dynamic for local-dynamic. */
31294 case TLS_MODEL_GLOBAL_DYNAMIC:
31295 case TLS_MODEL_LOCAL_DYNAMIC:
31296 putc ('\n', file);
31297 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
31298 fputs ("\t.tc .", file);
31299 RS6000_OUTPUT_BASENAME (file, name);
31300 fputs ("[TC],", file);
31301 output_addr_const (file, x);
31302 fputs ("@m", file);
31303 break;
31304 default:
31305 gcc_unreachable ();
31308 #endif
31310 putc ('\n', file);
31313 /* Output an assembler pseudo-op to write an ASCII string of N characters
31314 starting at P to FILE.
31316 On the RS/6000, we have to do this using the .byte operation and
31317 write out special characters outside the quoted string.
31318 Also, the assembler is broken; very long strings are truncated,
31319 so we must artificially break them up early. */
31321 void
31322 output_ascii (FILE *file, const char *p, int n)
31324 char c;
31325 int i, count_string;
31326 const char *for_string = "\t.byte \"";
31327 const char *for_decimal = "\t.byte ";
31328 const char *to_close = NULL;
31330 count_string = 0;
31331 for (i = 0; i < n; i++)
31333 c = *p++;
31334 if (c >= ' ' && c < 0177)
31336 if (for_string)
31337 fputs (for_string, file);
31338 putc (c, file);
31340 /* Write two quotes to get one. */
31341 if (c == '"')
31343 putc (c, file);
31344 ++count_string;
31347 for_string = NULL;
31348 for_decimal = "\"\n\t.byte ";
31349 to_close = "\"\n";
31350 ++count_string;
31352 if (count_string >= 512)
31354 fputs (to_close, file);
31356 for_string = "\t.byte \"";
31357 for_decimal = "\t.byte ";
31358 to_close = NULL;
31359 count_string = 0;
31362 else
31364 if (for_decimal)
31365 fputs (for_decimal, file);
31366 fprintf (file, "%d", c);
31368 for_string = "\n\t.byte \"";
31369 for_decimal = ", ";
31370 to_close = "\n";
31371 count_string = 0;
31375 /* Now close the string if we have written one. Then end the line. */
31376 if (to_close)
31377 fputs (to_close, file);
31380 /* Generate a unique section name for FILENAME for a section type
31381 represented by SECTION_DESC. Output goes into BUF.
31383 SECTION_DESC can be any string, as long as it is different for each
31384 possible section type.
31386 We name the section in the same manner as xlc. The name begins with an
31387 underscore followed by the filename (after stripping any leading directory
31388 names) with the last period replaced by the string SECTION_DESC. If
31389 FILENAME does not contain a period, SECTION_DESC is appended to the end of
31390 the name. */
31392 void
31393 rs6000_gen_section_name (char **buf, const char *filename,
31394 const char *section_desc)
31396 const char *q, *after_last_slash, *last_period = 0;
31397 char *p;
31398 int len;
31400 after_last_slash = filename;
31401 for (q = filename; *q; q++)
31403 if (*q == '/')
31404 after_last_slash = q + 1;
31405 else if (*q == '.')
31406 last_period = q;
31409 len = strlen (after_last_slash) + strlen (section_desc) + 2;
31410 *buf = (char *) xmalloc (len);
31412 p = *buf;
31413 *p++ = '_';
31415 for (q = after_last_slash; *q; q++)
31417 if (q == last_period)
31419 strcpy (p, section_desc);
31420 p += strlen (section_desc);
31421 break;
31424 else if (ISALNUM (*q))
31425 *p++ = *q;
31428 if (last_period == 0)
31429 strcpy (p, section_desc);
31430 else
31431 *p = '\0';
31434 /* Emit profile function. */
31436 void
31437 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
31439 /* Non-standard profiling for kernels, which just saves LR then calls
31440 _mcount without worrying about arg saves. The idea is to change
31441 the function prologue as little as possible as it isn't easy to
31442 account for arg save/restore code added just for _mcount. */
31443 if (TARGET_PROFILE_KERNEL)
31444 return;
31446 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31448 #ifndef NO_PROFILE_COUNTERS
31449 # define NO_PROFILE_COUNTERS 0
31450 #endif
31451 if (NO_PROFILE_COUNTERS)
31452 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31453 LCT_NORMAL, VOIDmode, 0);
31454 else
31456 char buf[30];
31457 const char *label_name;
31458 rtx fun;
31460 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31461 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
31462 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
31464 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31465 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
31468 else if (DEFAULT_ABI == ABI_DARWIN)
31470 const char *mcount_name = RS6000_MCOUNT;
31471 int caller_addr_regno = LR_REGNO;
31473 /* Be conservative and always set this, at least for now. */
31474 crtl->uses_pic_offset_table = 1;
31476 #if TARGET_MACHO
31477 /* For PIC code, set up a stub and collect the caller's address
31478 from r0, which is where the prologue puts it. */
31479 if (MACHOPIC_INDIRECT
31480 && crtl->uses_pic_offset_table)
31481 caller_addr_regno = 0;
31482 #endif
31483 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
31484 LCT_NORMAL, VOIDmode, 1,
31485 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
31489 /* Write function profiler code. */
31491 void
31492 output_function_profiler (FILE *file, int labelno)
31494 char buf[100];
31496 switch (DEFAULT_ABI)
31498 default:
31499 gcc_unreachable ();
31501 case ABI_V4:
31502 if (!TARGET_32BIT)
31504 warning (0, "no profiling of 64-bit code for this ABI");
31505 return;
31507 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31508 fprintf (file, "\tmflr %s\n", reg_names[0]);
31509 if (NO_PROFILE_COUNTERS)
31511 asm_fprintf (file, "\tstw %s,4(%s)\n",
31512 reg_names[0], reg_names[1]);
31514 else if (TARGET_SECURE_PLT && flag_pic)
31516 if (TARGET_LINK_STACK)
31518 char name[32];
31519 get_ppc476_thunk_name (name);
31520 asm_fprintf (file, "\tbl %s\n", name);
31522 else
31523 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
31524 asm_fprintf (file, "\tstw %s,4(%s)\n",
31525 reg_names[0], reg_names[1]);
31526 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31527 asm_fprintf (file, "\taddis %s,%s,",
31528 reg_names[12], reg_names[12]);
31529 assemble_name (file, buf);
31530 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
31531 assemble_name (file, buf);
31532 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
31534 else if (flag_pic == 1)
31536 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
31537 asm_fprintf (file, "\tstw %s,4(%s)\n",
31538 reg_names[0], reg_names[1]);
31539 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31540 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
31541 assemble_name (file, buf);
31542 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
31544 else if (flag_pic > 1)
31546 asm_fprintf (file, "\tstw %s,4(%s)\n",
31547 reg_names[0], reg_names[1]);
31548 /* Now, we need to get the address of the label. */
31549 if (TARGET_LINK_STACK)
31551 char name[32];
31552 get_ppc476_thunk_name (name);
31553 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
31554 assemble_name (file, buf);
31555 fputs ("-.\n1:", file);
31556 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31557 asm_fprintf (file, "\taddi %s,%s,4\n",
31558 reg_names[11], reg_names[11]);
31560 else
31562 fputs ("\tbcl 20,31,1f\n\t.long ", file);
31563 assemble_name (file, buf);
31564 fputs ("-.\n1:", file);
31565 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31567 asm_fprintf (file, "\tlwz %s,0(%s)\n",
31568 reg_names[0], reg_names[11]);
31569 asm_fprintf (file, "\tadd %s,%s,%s\n",
31570 reg_names[0], reg_names[0], reg_names[11]);
31572 else
31574 asm_fprintf (file, "\tlis %s,", reg_names[12]);
31575 assemble_name (file, buf);
31576 fputs ("@ha\n", file);
31577 asm_fprintf (file, "\tstw %s,4(%s)\n",
31578 reg_names[0], reg_names[1]);
31579 asm_fprintf (file, "\tla %s,", reg_names[0]);
31580 assemble_name (file, buf);
31581 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
31584 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
31585 fprintf (file, "\tbl %s%s\n",
31586 RS6000_MCOUNT, flag_pic ? "@plt" : "");
31587 break;
31589 case ABI_AIX:
31590 case ABI_ELFv2:
31591 case ABI_DARWIN:
31592 /* Don't do anything, done in output_profile_hook (). */
31593 break;
31599 /* The following variable value is the last issued insn. */
31601 static rtx_insn *last_scheduled_insn;
31603 /* The following variable helps to balance issuing of load and
31604 store instructions */
31606 static int load_store_pendulum;
31608 /* The following variable helps pair divide insns during scheduling. */
31609 static int divide_cnt;
31610 /* The following variable helps pair and alternate vector and vector load
31611 insns during scheduling. */
31612 static int vec_load_pendulum;
31615 /* Power4 load update and store update instructions are cracked into a
31616 load or store and an integer insn which are executed in the same cycle.
31617 Branches have their own dispatch slot which does not count against the
31618 GCC issue rate, but it changes the program flow so there are no other
31619 instructions to issue in this cycle. */
31621 static int
31622 rs6000_variable_issue_1 (rtx_insn *insn, int more)
31624 last_scheduled_insn = insn;
31625 if (GET_CODE (PATTERN (insn)) == USE
31626 || GET_CODE (PATTERN (insn)) == CLOBBER)
31628 cached_can_issue_more = more;
31629 return cached_can_issue_more;
31632 if (insn_terminates_group_p (insn, current_group))
31634 cached_can_issue_more = 0;
31635 return cached_can_issue_more;
31638 /* If no reservation, but reach here */
31639 if (recog_memoized (insn) < 0)
31640 return more;
31642 if (rs6000_sched_groups)
31644 if (is_microcoded_insn (insn))
31645 cached_can_issue_more = 0;
31646 else if (is_cracked_insn (insn))
31647 cached_can_issue_more = more > 2 ? more - 2 : 0;
31648 else
31649 cached_can_issue_more = more - 1;
31651 return cached_can_issue_more;
31654 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
31655 return 0;
31657 cached_can_issue_more = more - 1;
31658 return cached_can_issue_more;
31661 static int
31662 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
31664 int r = rs6000_variable_issue_1 (insn, more);
31665 if (verbose)
31666 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
31667 return r;
31670 /* Adjust the cost of a scheduling dependency. Return the new cost of
31671 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
31673 static int
31674 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
31675 unsigned int)
31677 enum attr_type attr_type;
31679 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
31680 return cost;
31682 switch (dep_type)
31684 case REG_DEP_TRUE:
31686 /* Data dependency; DEP_INSN writes a register that INSN reads
31687 some cycles later. */
31689 /* Separate a load from a narrower, dependent store. */
31690 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
31691 && GET_CODE (PATTERN (insn)) == SET
31692 && GET_CODE (PATTERN (dep_insn)) == SET
31693 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
31694 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
31695 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
31696 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
31697 return cost + 14;
31699 attr_type = get_attr_type (insn);
31701 switch (attr_type)
31703 case TYPE_JMPREG:
31704 /* Tell the first scheduling pass about the latency between
31705 a mtctr and bctr (and mtlr and br/blr). The first
31706 scheduling pass will not know about this latency since
31707 the mtctr instruction, which has the latency associated
31708 to it, will be generated by reload. */
31709 return 4;
31710 case TYPE_BRANCH:
31711 /* Leave some extra cycles between a compare and its
31712 dependent branch, to inhibit expensive mispredicts. */
31713 if ((rs6000_cpu_attr == CPU_PPC603
31714 || rs6000_cpu_attr == CPU_PPC604
31715 || rs6000_cpu_attr == CPU_PPC604E
31716 || rs6000_cpu_attr == CPU_PPC620
31717 || rs6000_cpu_attr == CPU_PPC630
31718 || rs6000_cpu_attr == CPU_PPC750
31719 || rs6000_cpu_attr == CPU_PPC7400
31720 || rs6000_cpu_attr == CPU_PPC7450
31721 || rs6000_cpu_attr == CPU_PPCE5500
31722 || rs6000_cpu_attr == CPU_PPCE6500
31723 || rs6000_cpu_attr == CPU_POWER4
31724 || rs6000_cpu_attr == CPU_POWER5
31725 || rs6000_cpu_attr == CPU_POWER7
31726 || rs6000_cpu_attr == CPU_POWER8
31727 || rs6000_cpu_attr == CPU_POWER9
31728 || rs6000_cpu_attr == CPU_CELL)
31729 && recog_memoized (dep_insn)
31730 && (INSN_CODE (dep_insn) >= 0))
31732 switch (get_attr_type (dep_insn))
31734 case TYPE_CMP:
31735 case TYPE_FPCOMPARE:
31736 case TYPE_CR_LOGICAL:
31737 case TYPE_DELAYED_CR:
31738 return cost + 2;
31739 case TYPE_EXTS:
31740 case TYPE_MUL:
31741 if (get_attr_dot (dep_insn) == DOT_YES)
31742 return cost + 2;
31743 else
31744 break;
31745 case TYPE_SHIFT:
31746 if (get_attr_dot (dep_insn) == DOT_YES
31747 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
31748 return cost + 2;
31749 else
31750 break;
31751 default:
31752 break;
31754 break;
31756 case TYPE_STORE:
31757 case TYPE_FPSTORE:
31758 if ((rs6000_cpu == PROCESSOR_POWER6)
31759 && recog_memoized (dep_insn)
31760 && (INSN_CODE (dep_insn) >= 0))
31763 if (GET_CODE (PATTERN (insn)) != SET)
31764 /* If this happens, we have to extend this to schedule
31765 optimally. Return default for now. */
31766 return cost;
31768 /* Adjust the cost for the case where the value written
31769 by a fixed point operation is used as the address
31770 gen value on a store. */
31771 switch (get_attr_type (dep_insn))
31773 case TYPE_LOAD:
31774 case TYPE_CNTLZ:
31776 if (! store_data_bypass_p (dep_insn, insn))
31777 return get_attr_sign_extend (dep_insn)
31778 == SIGN_EXTEND_YES ? 6 : 4;
31779 break;
31781 case TYPE_SHIFT:
31783 if (! store_data_bypass_p (dep_insn, insn))
31784 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31785 6 : 3;
31786 break;
31788 case TYPE_INTEGER:
31789 case TYPE_ADD:
31790 case TYPE_LOGICAL:
31791 case TYPE_EXTS:
31792 case TYPE_INSERT:
31794 if (! store_data_bypass_p (dep_insn, insn))
31795 return 3;
31796 break;
31798 case TYPE_STORE:
31799 case TYPE_FPLOAD:
31800 case TYPE_FPSTORE:
31802 if (get_attr_update (dep_insn) == UPDATE_YES
31803 && ! store_data_bypass_p (dep_insn, insn))
31804 return 3;
31805 break;
31807 case TYPE_MUL:
31809 if (! store_data_bypass_p (dep_insn, insn))
31810 return 17;
31811 break;
31813 case TYPE_DIV:
31815 if (! store_data_bypass_p (dep_insn, insn))
31816 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31817 break;
31819 default:
31820 break;
31823 break;
31825 case TYPE_LOAD:
31826 if ((rs6000_cpu == PROCESSOR_POWER6)
31827 && recog_memoized (dep_insn)
31828 && (INSN_CODE (dep_insn) >= 0))
31831 /* Adjust the cost for the case where the value written
31832 by a fixed point instruction is used within the address
31833 gen portion of a subsequent load(u)(x) */
31834 switch (get_attr_type (dep_insn))
31836 case TYPE_LOAD:
31837 case TYPE_CNTLZ:
31839 if (set_to_load_agen (dep_insn, insn))
31840 return get_attr_sign_extend (dep_insn)
31841 == SIGN_EXTEND_YES ? 6 : 4;
31842 break;
31844 case TYPE_SHIFT:
31846 if (set_to_load_agen (dep_insn, insn))
31847 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31848 6 : 3;
31849 break;
31851 case TYPE_INTEGER:
31852 case TYPE_ADD:
31853 case TYPE_LOGICAL:
31854 case TYPE_EXTS:
31855 case TYPE_INSERT:
31857 if (set_to_load_agen (dep_insn, insn))
31858 return 3;
31859 break;
31861 case TYPE_STORE:
31862 case TYPE_FPLOAD:
31863 case TYPE_FPSTORE:
31865 if (get_attr_update (dep_insn) == UPDATE_YES
31866 && set_to_load_agen (dep_insn, insn))
31867 return 3;
31868 break;
31870 case TYPE_MUL:
31872 if (set_to_load_agen (dep_insn, insn))
31873 return 17;
31874 break;
31876 case TYPE_DIV:
31878 if (set_to_load_agen (dep_insn, insn))
31879 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31880 break;
31882 default:
31883 break;
31886 break;
31888 case TYPE_FPLOAD:
31889 if ((rs6000_cpu == PROCESSOR_POWER6)
31890 && get_attr_update (insn) == UPDATE_NO
31891 && recog_memoized (dep_insn)
31892 && (INSN_CODE (dep_insn) >= 0)
31893 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
31894 return 2;
31896 default:
31897 break;
31900 /* Fall out to return default cost. */
31902 break;
31904 case REG_DEP_OUTPUT:
31905 /* Output dependency; DEP_INSN writes a register that INSN writes some
31906 cycles later. */
31907 if ((rs6000_cpu == PROCESSOR_POWER6)
31908 && recog_memoized (dep_insn)
31909 && (INSN_CODE (dep_insn) >= 0))
31911 attr_type = get_attr_type (insn);
31913 switch (attr_type)
31915 case TYPE_FP:
31916 case TYPE_FPSIMPLE:
31917 if (get_attr_type (dep_insn) == TYPE_FP
31918 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
31919 return 1;
31920 break;
31921 case TYPE_FPLOAD:
31922 if (get_attr_update (insn) == UPDATE_NO
31923 && get_attr_type (dep_insn) == TYPE_MFFGPR)
31924 return 2;
31925 break;
31926 default:
31927 break;
31930 /* Fall through, no cost for output dependency. */
31931 /* FALLTHRU */
31933 case REG_DEP_ANTI:
31934 /* Anti dependency; DEP_INSN reads a register that INSN writes some
31935 cycles later. */
31936 return 0;
31938 default:
31939 gcc_unreachable ();
31942 return cost;
31945 /* Debug version of rs6000_adjust_cost. */
31947 static int
31948 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
31949 int cost, unsigned int dw)
31951 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
31953 if (ret != cost)
31955 const char *dep;
31957 switch (dep_type)
31959 default: dep = "unknown depencency"; break;
31960 case REG_DEP_TRUE: dep = "data dependency"; break;
31961 case REG_DEP_OUTPUT: dep = "output dependency"; break;
31962 case REG_DEP_ANTI: dep = "anti depencency"; break;
31965 fprintf (stderr,
31966 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
31967 "%s, insn:\n", ret, cost, dep);
31969 debug_rtx (insn);
31972 return ret;
31975 /* The function returns a true if INSN is microcoded.
31976 Return false otherwise. */
31978 static bool
31979 is_microcoded_insn (rtx_insn *insn)
31981 if (!insn || !NONDEBUG_INSN_P (insn)
31982 || GET_CODE (PATTERN (insn)) == USE
31983 || GET_CODE (PATTERN (insn)) == CLOBBER)
31984 return false;
31986 if (rs6000_cpu_attr == CPU_CELL)
31987 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
31989 if (rs6000_sched_groups
31990 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31992 enum attr_type type = get_attr_type (insn);
31993 if ((type == TYPE_LOAD
31994 && get_attr_update (insn) == UPDATE_YES
31995 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
31996 || ((type == TYPE_LOAD || type == TYPE_STORE)
31997 && get_attr_update (insn) == UPDATE_YES
31998 && get_attr_indexed (insn) == INDEXED_YES)
31999 || type == TYPE_MFCR)
32000 return true;
32003 return false;
32006 /* The function returns true if INSN is cracked into 2 instructions
32007 by the processor (and therefore occupies 2 issue slots). */
32009 static bool
32010 is_cracked_insn (rtx_insn *insn)
32012 if (!insn || !NONDEBUG_INSN_P (insn)
32013 || GET_CODE (PATTERN (insn)) == USE
32014 || GET_CODE (PATTERN (insn)) == CLOBBER)
32015 return false;
32017 if (rs6000_sched_groups
32018 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
32020 enum attr_type type = get_attr_type (insn);
32021 if ((type == TYPE_LOAD
32022 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32023 && get_attr_update (insn) == UPDATE_NO)
32024 || (type == TYPE_LOAD
32025 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
32026 && get_attr_update (insn) == UPDATE_YES
32027 && get_attr_indexed (insn) == INDEXED_NO)
32028 || (type == TYPE_STORE
32029 && get_attr_update (insn) == UPDATE_YES
32030 && get_attr_indexed (insn) == INDEXED_NO)
32031 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
32032 && get_attr_update (insn) == UPDATE_YES)
32033 || type == TYPE_DELAYED_CR
32034 || (type == TYPE_EXTS
32035 && get_attr_dot (insn) == DOT_YES)
32036 || (type == TYPE_SHIFT
32037 && get_attr_dot (insn) == DOT_YES
32038 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
32039 || (type == TYPE_MUL
32040 && get_attr_dot (insn) == DOT_YES)
32041 || type == TYPE_DIV
32042 || (type == TYPE_INSERT
32043 && get_attr_size (insn) == SIZE_32))
32044 return true;
32047 return false;
32050 /* The function returns true if INSN can be issued only from
32051 the branch slot. */
32053 static bool
32054 is_branch_slot_insn (rtx_insn *insn)
32056 if (!insn || !NONDEBUG_INSN_P (insn)
32057 || GET_CODE (PATTERN (insn)) == USE
32058 || GET_CODE (PATTERN (insn)) == CLOBBER)
32059 return false;
32061 if (rs6000_sched_groups)
32063 enum attr_type type = get_attr_type (insn);
32064 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
32065 return true;
32066 return false;
32069 return false;
32072 /* The function returns true if out_inst sets a value that is
32073 used in the address generation computation of in_insn */
32074 static bool
32075 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
32077 rtx out_set, in_set;
32079 /* For performance reasons, only handle the simple case where
32080 both loads are a single_set. */
32081 out_set = single_set (out_insn);
32082 if (out_set)
32084 in_set = single_set (in_insn);
32085 if (in_set)
32086 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
32089 return false;
32092 /* Try to determine base/offset/size parts of the given MEM.
32093 Return true if successful, false if all the values couldn't
32094 be determined.
32096 This function only looks for REG or REG+CONST address forms.
32097 REG+REG address form will return false. */
32099 static bool
32100 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
32101 HOST_WIDE_INT *size)
32103 rtx addr_rtx;
32104 if MEM_SIZE_KNOWN_P (mem)
32105 *size = MEM_SIZE (mem);
32106 else
32107 return false;
32109 addr_rtx = (XEXP (mem, 0));
32110 if (GET_CODE (addr_rtx) == PRE_MODIFY)
32111 addr_rtx = XEXP (addr_rtx, 1);
32113 *offset = 0;
32114 while (GET_CODE (addr_rtx) == PLUS
32115 && CONST_INT_P (XEXP (addr_rtx, 1)))
32117 *offset += INTVAL (XEXP (addr_rtx, 1));
32118 addr_rtx = XEXP (addr_rtx, 0);
32120 if (!REG_P (addr_rtx))
32121 return false;
32123 *base = addr_rtx;
32124 return true;
32127 /* The function returns true if the target storage location of
32128 mem1 is adjacent to the target storage location of mem2 */
32129 /* Return 1 if memory locations are adjacent. */
32131 static bool
32132 adjacent_mem_locations (rtx mem1, rtx mem2)
32134 rtx reg1, reg2;
32135 HOST_WIDE_INT off1, size1, off2, size2;
32137 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32138 && get_memref_parts (mem2, &reg2, &off2, &size2))
32139 return ((REGNO (reg1) == REGNO (reg2))
32140 && ((off1 + size1 == off2)
32141 || (off2 + size2 == off1)));
32143 return false;
32146 /* This function returns true if it can be determined that the two MEM
32147 locations overlap by at least 1 byte based on base reg/offset/size. */
32149 static bool
32150 mem_locations_overlap (rtx mem1, rtx mem2)
32152 rtx reg1, reg2;
32153 HOST_WIDE_INT off1, size1, off2, size2;
32155 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32156 && get_memref_parts (mem2, &reg2, &off2, &size2))
32157 return ((REGNO (reg1) == REGNO (reg2))
32158 && (((off1 <= off2) && (off1 + size1 > off2))
32159 || ((off2 <= off1) && (off2 + size2 > off1))));
32161 return false;
32164 /* A C statement (sans semicolon) to update the integer scheduling
32165 priority INSN_PRIORITY (INSN). Increase the priority to execute the
32166 INSN earlier, reduce the priority to execute INSN later. Do not
32167 define this macro if you do not need to adjust the scheduling
32168 priorities of insns. */
32170 static int
32171 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
32173 rtx load_mem, str_mem;
32174 /* On machines (like the 750) which have asymmetric integer units,
32175 where one integer unit can do multiply and divides and the other
32176 can't, reduce the priority of multiply/divide so it is scheduled
32177 before other integer operations. */
32179 #if 0
32180 if (! INSN_P (insn))
32181 return priority;
32183 if (GET_CODE (PATTERN (insn)) == USE)
32184 return priority;
32186 switch (rs6000_cpu_attr) {
32187 case CPU_PPC750:
32188 switch (get_attr_type (insn))
32190 default:
32191 break;
32193 case TYPE_MUL:
32194 case TYPE_DIV:
32195 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
32196 priority, priority);
32197 if (priority >= 0 && priority < 0x01000000)
32198 priority >>= 3;
32199 break;
32202 #endif
32204 if (insn_must_be_first_in_group (insn)
32205 && reload_completed
32206 && current_sched_info->sched_max_insns_priority
32207 && rs6000_sched_restricted_insns_priority)
32210 /* Prioritize insns that can be dispatched only in the first
32211 dispatch slot. */
32212 if (rs6000_sched_restricted_insns_priority == 1)
32213 /* Attach highest priority to insn. This means that in
32214 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
32215 precede 'priority' (critical path) considerations. */
32216 return current_sched_info->sched_max_insns_priority;
32217 else if (rs6000_sched_restricted_insns_priority == 2)
32218 /* Increase priority of insn by a minimal amount. This means that in
32219 haifa-sched.c:ready_sort(), only 'priority' (critical path)
32220 considerations precede dispatch-slot restriction considerations. */
32221 return (priority + 1);
32224 if (rs6000_cpu == PROCESSOR_POWER6
32225 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
32226 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
32227 /* Attach highest priority to insn if the scheduler has just issued two
32228 stores and this instruction is a load, or two loads and this instruction
32229 is a store. Power6 wants loads and stores scheduled alternately
32230 when possible */
32231 return current_sched_info->sched_max_insns_priority;
32233 return priority;
32236 /* Return true if the instruction is nonpipelined on the Cell. */
32237 static bool
32238 is_nonpipeline_insn (rtx_insn *insn)
32240 enum attr_type type;
32241 if (!insn || !NONDEBUG_INSN_P (insn)
32242 || GET_CODE (PATTERN (insn)) == USE
32243 || GET_CODE (PATTERN (insn)) == CLOBBER)
32244 return false;
32246 type = get_attr_type (insn);
32247 if (type == TYPE_MUL
32248 || type == TYPE_DIV
32249 || type == TYPE_SDIV
32250 || type == TYPE_DDIV
32251 || type == TYPE_SSQRT
32252 || type == TYPE_DSQRT
32253 || type == TYPE_MFCR
32254 || type == TYPE_MFCRF
32255 || type == TYPE_MFJMPR)
32257 return true;
32259 return false;
32263 /* Return how many instructions the machine can issue per cycle. */
32265 static int
32266 rs6000_issue_rate (void)
32268 /* Unless scheduling for register pressure, use issue rate of 1 for
32269 first scheduling pass to decrease degradation. */
32270 if (!reload_completed && !flag_sched_pressure)
32271 return 1;
32273 switch (rs6000_cpu_attr) {
32274 case CPU_RS64A:
32275 case CPU_PPC601: /* ? */
32276 case CPU_PPC7450:
32277 return 3;
32278 case CPU_PPC440:
32279 case CPU_PPC603:
32280 case CPU_PPC750:
32281 case CPU_PPC7400:
32282 case CPU_PPC8540:
32283 case CPU_PPC8548:
32284 case CPU_CELL:
32285 case CPU_PPCE300C2:
32286 case CPU_PPCE300C3:
32287 case CPU_PPCE500MC:
32288 case CPU_PPCE500MC64:
32289 case CPU_PPCE5500:
32290 case CPU_PPCE6500:
32291 case CPU_TITAN:
32292 return 2;
32293 case CPU_PPC476:
32294 case CPU_PPC604:
32295 case CPU_PPC604E:
32296 case CPU_PPC620:
32297 case CPU_PPC630:
32298 return 4;
32299 case CPU_POWER4:
32300 case CPU_POWER5:
32301 case CPU_POWER6:
32302 case CPU_POWER7:
32303 return 5;
32304 case CPU_POWER8:
32305 return 7;
32306 case CPU_POWER9:
32307 return 6;
32308 default:
32309 return 1;
32313 /* Return how many instructions to look ahead for better insn
32314 scheduling. */
32316 static int
32317 rs6000_use_sched_lookahead (void)
32319 switch (rs6000_cpu_attr)
32321 case CPU_PPC8540:
32322 case CPU_PPC8548:
32323 return 4;
32325 case CPU_CELL:
32326 return (reload_completed ? 8 : 0);
32328 default:
32329 return 0;
32333 /* We are choosing insn from the ready queue. Return zero if INSN can be
32334 chosen. */
32335 static int
32336 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
32338 if (ready_index == 0)
32339 return 0;
32341 if (rs6000_cpu_attr != CPU_CELL)
32342 return 0;
32344 gcc_assert (insn != NULL_RTX && INSN_P (insn));
32346 if (!reload_completed
32347 || is_nonpipeline_insn (insn)
32348 || is_microcoded_insn (insn))
32349 return 1;
32351 return 0;
32354 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
32355 and return true. */
32357 static bool
32358 find_mem_ref (rtx pat, rtx *mem_ref)
32360 const char * fmt;
32361 int i, j;
32363 /* stack_tie does not produce any real memory traffic. */
32364 if (tie_operand (pat, VOIDmode))
32365 return false;
32367 if (GET_CODE (pat) == MEM)
32369 *mem_ref = pat;
32370 return true;
32373 /* Recursively process the pattern. */
32374 fmt = GET_RTX_FORMAT (GET_CODE (pat));
32376 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
32378 if (fmt[i] == 'e')
32380 if (find_mem_ref (XEXP (pat, i), mem_ref))
32381 return true;
32383 else if (fmt[i] == 'E')
32384 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
32386 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
32387 return true;
32391 return false;
32394 /* Determine if PAT is a PATTERN of a load insn. */
32396 static bool
32397 is_load_insn1 (rtx pat, rtx *load_mem)
32399 if (!pat || pat == NULL_RTX)
32400 return false;
32402 if (GET_CODE (pat) == SET)
32403 return find_mem_ref (SET_SRC (pat), load_mem);
32405 if (GET_CODE (pat) == PARALLEL)
32407 int i;
32409 for (i = 0; i < XVECLEN (pat, 0); i++)
32410 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
32411 return true;
32414 return false;
32417 /* Determine if INSN loads from memory. */
32419 static bool
32420 is_load_insn (rtx insn, rtx *load_mem)
32422 if (!insn || !INSN_P (insn))
32423 return false;
32425 if (CALL_P (insn))
32426 return false;
32428 return is_load_insn1 (PATTERN (insn), load_mem);
32431 /* Determine if PAT is a PATTERN of a store insn. */
32433 static bool
32434 is_store_insn1 (rtx pat, rtx *str_mem)
32436 if (!pat || pat == NULL_RTX)
32437 return false;
32439 if (GET_CODE (pat) == SET)
32440 return find_mem_ref (SET_DEST (pat), str_mem);
32442 if (GET_CODE (pat) == PARALLEL)
32444 int i;
32446 for (i = 0; i < XVECLEN (pat, 0); i++)
32447 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
32448 return true;
32451 return false;
32454 /* Determine if INSN stores to memory. */
32456 static bool
32457 is_store_insn (rtx insn, rtx *str_mem)
32459 if (!insn || !INSN_P (insn))
32460 return false;
32462 return is_store_insn1 (PATTERN (insn), str_mem);
32465 /* Return whether TYPE is a Power9 pairable vector instruction type. */
32467 static bool
32468 is_power9_pairable_vec_type (enum attr_type type)
32470 switch (type)
32472 case TYPE_VECSIMPLE:
32473 case TYPE_VECCOMPLEX:
32474 case TYPE_VECDIV:
32475 case TYPE_VECCMP:
32476 case TYPE_VECPERM:
32477 case TYPE_VECFLOAT:
32478 case TYPE_VECFDIV:
32479 case TYPE_VECDOUBLE:
32480 return true;
32481 default:
32482 break;
32484 return false;
32487 /* Returns whether the dependence between INSN and NEXT is considered
32488 costly by the given target. */
32490 static bool
32491 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
32493 rtx insn;
32494 rtx next;
32495 rtx load_mem, str_mem;
32497 /* If the flag is not enabled - no dependence is considered costly;
32498 allow all dependent insns in the same group.
32499 This is the most aggressive option. */
32500 if (rs6000_sched_costly_dep == no_dep_costly)
32501 return false;
32503 /* If the flag is set to 1 - a dependence is always considered costly;
32504 do not allow dependent instructions in the same group.
32505 This is the most conservative option. */
32506 if (rs6000_sched_costly_dep == all_deps_costly)
32507 return true;
32509 insn = DEP_PRO (dep);
32510 next = DEP_CON (dep);
32512 if (rs6000_sched_costly_dep == store_to_load_dep_costly
32513 && is_load_insn (next, &load_mem)
32514 && is_store_insn (insn, &str_mem))
32515 /* Prevent load after store in the same group. */
32516 return true;
32518 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
32519 && is_load_insn (next, &load_mem)
32520 && is_store_insn (insn, &str_mem)
32521 && DEP_TYPE (dep) == REG_DEP_TRUE
32522 && mem_locations_overlap(str_mem, load_mem))
32523 /* Prevent load after store in the same group if it is a true
32524 dependence. */
32525 return true;
32527 /* The flag is set to X; dependences with latency >= X are considered costly,
32528 and will not be scheduled in the same group. */
32529 if (rs6000_sched_costly_dep <= max_dep_latency
32530 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
32531 return true;
32533 return false;
32536 /* Return the next insn after INSN that is found before TAIL is reached,
32537 skipping any "non-active" insns - insns that will not actually occupy
32538 an issue slot. Return NULL_RTX if such an insn is not found. */
32540 static rtx_insn *
32541 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
32543 if (insn == NULL_RTX || insn == tail)
32544 return NULL;
32546 while (1)
32548 insn = NEXT_INSN (insn);
32549 if (insn == NULL_RTX || insn == tail)
32550 return NULL;
32552 if (CALL_P (insn)
32553 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
32554 || (NONJUMP_INSN_P (insn)
32555 && GET_CODE (PATTERN (insn)) != USE
32556 && GET_CODE (PATTERN (insn)) != CLOBBER
32557 && INSN_CODE (insn) != CODE_FOR_stack_tie))
32558 break;
32560 return insn;
32563 /* Do Power9 specific sched_reorder2 reordering of ready list. */
32565 static int
32566 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
32568 int pos;
32569 int i;
32570 rtx_insn *tmp;
32571 enum attr_type type;
32573 type = get_attr_type (last_scheduled_insn);
32575 /* Try to issue fixed point divides back-to-back in pairs so they will be
32576 routed to separate execution units and execute in parallel. */
32577 if (type == TYPE_DIV && divide_cnt == 0)
32579 /* First divide has been scheduled. */
32580 divide_cnt = 1;
32582 /* Scan the ready list looking for another divide, if found move it
32583 to the end of the list so it is chosen next. */
32584 pos = lastpos;
32585 while (pos >= 0)
32587 if (recog_memoized (ready[pos]) >= 0
32588 && get_attr_type (ready[pos]) == TYPE_DIV)
32590 tmp = ready[pos];
32591 for (i = pos; i < lastpos; i++)
32592 ready[i] = ready[i + 1];
32593 ready[lastpos] = tmp;
32594 break;
32596 pos--;
32599 else
32601 /* Last insn was the 2nd divide or not a divide, reset the counter. */
32602 divide_cnt = 0;
32604 /* Power9 can execute 2 vector operations and 2 vector loads in a single
32605 cycle. So try to pair up and alternate groups of vector and vector
32606 load instructions.
32608 To aid this formation, a counter is maintained to keep track of
32609 vec/vecload insns issued. The value of vec_load_pendulum maintains
32610 the current state with the following values:
32612 0 : Initial state, no vec/vecload group has been started.
32614 -1 : 1 vector load has been issued and another has been found on
32615 the ready list and moved to the end.
32617 -2 : 2 vector loads have been issued and a vector operation has
32618 been found and moved to the end of the ready list.
32620 -3 : 2 vector loads and a vector insn have been issued and a
32621 vector operation has been found and moved to the end of the
32622 ready list.
32624 1 : 1 vector insn has been issued and another has been found and
32625 moved to the end of the ready list.
32627 2 : 2 vector insns have been issued and a vector load has been
32628 found and moved to the end of the ready list.
32630 3 : 2 vector insns and a vector load have been issued and another
32631 vector load has been found and moved to the end of the ready
32632 list. */
32633 if (type == TYPE_VECLOAD)
32635 /* Issued a vecload. */
32636 if (vec_load_pendulum == 0)
32638 /* We issued a single vecload, look for another and move it to
32639 the end of the ready list so it will be scheduled next.
32640 Set pendulum if found. */
32641 pos = lastpos;
32642 while (pos >= 0)
32644 if (recog_memoized (ready[pos]) >= 0
32645 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32647 tmp = ready[pos];
32648 for (i = pos; i < lastpos; i++)
32649 ready[i] = ready[i + 1];
32650 ready[lastpos] = tmp;
32651 vec_load_pendulum = -1;
32652 return cached_can_issue_more;
32654 pos--;
32657 else if (vec_load_pendulum == -1)
32659 /* This is the second vecload we've issued, search the ready
32660 list for a vector operation so we can try to schedule a
32661 pair of those next. If found move to the end of the ready
32662 list so it is scheduled next and set the pendulum. */
32663 pos = lastpos;
32664 while (pos >= 0)
32666 if (recog_memoized (ready[pos]) >= 0
32667 && is_power9_pairable_vec_type (
32668 get_attr_type (ready[pos])))
32670 tmp = ready[pos];
32671 for (i = pos; i < lastpos; i++)
32672 ready[i] = ready[i + 1];
32673 ready[lastpos] = tmp;
32674 vec_load_pendulum = -2;
32675 return cached_can_issue_more;
32677 pos--;
32680 else if (vec_load_pendulum == 2)
32682 /* Two vector ops have been issued and we've just issued a
32683 vecload, look for another vecload and move to end of ready
32684 list if found. */
32685 pos = lastpos;
32686 while (pos >= 0)
32688 if (recog_memoized (ready[pos]) >= 0
32689 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32691 tmp = ready[pos];
32692 for (i = pos; i < lastpos; i++)
32693 ready[i] = ready[i + 1];
32694 ready[lastpos] = tmp;
32695 /* Set pendulum so that next vecload will be seen as
32696 finishing a group, not start of one. */
32697 vec_load_pendulum = 3;
32698 return cached_can_issue_more;
32700 pos--;
32704 else if (is_power9_pairable_vec_type (type))
32706 /* Issued a vector operation. */
32707 if (vec_load_pendulum == 0)
32708 /* We issued a single vec op, look for another and move it
32709 to the end of the ready list so it will be scheduled next.
32710 Set pendulum if found. */
32712 pos = lastpos;
32713 while (pos >= 0)
32715 if (recog_memoized (ready[pos]) >= 0
32716 && is_power9_pairable_vec_type (
32717 get_attr_type (ready[pos])))
32719 tmp = ready[pos];
32720 for (i = pos; i < lastpos; i++)
32721 ready[i] = ready[i + 1];
32722 ready[lastpos] = tmp;
32723 vec_load_pendulum = 1;
32724 return cached_can_issue_more;
32726 pos--;
32729 else if (vec_load_pendulum == 1)
32731 /* This is the second vec op we've issued, search the ready
32732 list for a vecload operation so we can try to schedule a
32733 pair of those next. If found move to the end of the ready
32734 list so it is scheduled next and set the pendulum. */
32735 pos = lastpos;
32736 while (pos >= 0)
32738 if (recog_memoized (ready[pos]) >= 0
32739 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32741 tmp = ready[pos];
32742 for (i = pos; i < lastpos; i++)
32743 ready[i] = ready[i + 1];
32744 ready[lastpos] = tmp;
32745 vec_load_pendulum = 2;
32746 return cached_can_issue_more;
32748 pos--;
32751 else if (vec_load_pendulum == -2)
32753 /* Two vecload ops have been issued and we've just issued a
32754 vec op, look for another vec op and move to end of ready
32755 list if found. */
32756 pos = lastpos;
32757 while (pos >= 0)
32759 if (recog_memoized (ready[pos]) >= 0
32760 && is_power9_pairable_vec_type (
32761 get_attr_type (ready[pos])))
32763 tmp = ready[pos];
32764 for (i = pos; i < lastpos; i++)
32765 ready[i] = ready[i + 1];
32766 ready[lastpos] = tmp;
32767 /* Set pendulum so that next vec op will be seen as
32768 finishing a group, not start of one. */
32769 vec_load_pendulum = -3;
32770 return cached_can_issue_more;
32772 pos--;
32777 /* We've either finished a vec/vecload group, couldn't find an insn to
32778 continue the current group, or the last insn had nothing to do with
32779 with a group. In any case, reset the pendulum. */
32780 vec_load_pendulum = 0;
32783 return cached_can_issue_more;
32786 /* We are about to begin issuing insns for this clock cycle. */
32788 static int
32789 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
32790 rtx_insn **ready ATTRIBUTE_UNUSED,
32791 int *pn_ready ATTRIBUTE_UNUSED,
32792 int clock_var ATTRIBUTE_UNUSED)
32794 int n_ready = *pn_ready;
32796 if (sched_verbose)
32797 fprintf (dump, "// rs6000_sched_reorder :\n");
32799 /* Reorder the ready list, if the second to last ready insn
32800 is a nonepipeline insn. */
32801 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
32803 if (is_nonpipeline_insn (ready[n_ready - 1])
32804 && (recog_memoized (ready[n_ready - 2]) > 0))
32805 /* Simply swap first two insns. */
32806 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
32809 if (rs6000_cpu == PROCESSOR_POWER6)
32810 load_store_pendulum = 0;
32812 return rs6000_issue_rate ();
32815 /* Like rs6000_sched_reorder, but called after issuing each insn. */
32817 static int
32818 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
32819 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
32821 if (sched_verbose)
32822 fprintf (dump, "// rs6000_sched_reorder2 :\n");
32824 /* For Power6, we need to handle some special cases to try and keep the
32825 store queue from overflowing and triggering expensive flushes.
32827 This code monitors how load and store instructions are being issued
32828 and skews the ready list one way or the other to increase the likelihood
32829 that a desired instruction is issued at the proper time.
32831 A couple of things are done. First, we maintain a "load_store_pendulum"
32832 to track the current state of load/store issue.
32834 - If the pendulum is at zero, then no loads or stores have been
32835 issued in the current cycle so we do nothing.
32837 - If the pendulum is 1, then a single load has been issued in this
32838 cycle and we attempt to locate another load in the ready list to
32839 issue with it.
32841 - If the pendulum is -2, then two stores have already been
32842 issued in this cycle, so we increase the priority of the first load
32843 in the ready list to increase it's likelihood of being chosen first
32844 in the next cycle.
32846 - If the pendulum is -1, then a single store has been issued in this
32847 cycle and we attempt to locate another store in the ready list to
32848 issue with it, preferring a store to an adjacent memory location to
32849 facilitate store pairing in the store queue.
32851 - If the pendulum is 2, then two loads have already been
32852 issued in this cycle, so we increase the priority of the first store
32853 in the ready list to increase it's likelihood of being chosen first
32854 in the next cycle.
32856 - If the pendulum < -2 or > 2, then do nothing.
32858 Note: This code covers the most common scenarios. There exist non
32859 load/store instructions which make use of the LSU and which
32860 would need to be accounted for to strictly model the behavior
32861 of the machine. Those instructions are currently unaccounted
32862 for to help minimize compile time overhead of this code.
32864 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
32866 int pos;
32867 int i;
32868 rtx_insn *tmp;
32869 rtx load_mem, str_mem;
32871 if (is_store_insn (last_scheduled_insn, &str_mem))
32872 /* Issuing a store, swing the load_store_pendulum to the left */
32873 load_store_pendulum--;
32874 else if (is_load_insn (last_scheduled_insn, &load_mem))
32875 /* Issuing a load, swing the load_store_pendulum to the right */
32876 load_store_pendulum++;
32877 else
32878 return cached_can_issue_more;
32880 /* If the pendulum is balanced, or there is only one instruction on
32881 the ready list, then all is well, so return. */
32882 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
32883 return cached_can_issue_more;
32885 if (load_store_pendulum == 1)
32887 /* A load has been issued in this cycle. Scan the ready list
32888 for another load to issue with it */
32889 pos = *pn_ready-1;
32891 while (pos >= 0)
32893 if (is_load_insn (ready[pos], &load_mem))
32895 /* Found a load. Move it to the head of the ready list,
32896 and adjust it's priority so that it is more likely to
32897 stay there */
32898 tmp = ready[pos];
32899 for (i=pos; i<*pn_ready-1; i++)
32900 ready[i] = ready[i + 1];
32901 ready[*pn_ready-1] = tmp;
32903 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32904 INSN_PRIORITY (tmp)++;
32905 break;
32907 pos--;
32910 else if (load_store_pendulum == -2)
32912 /* Two stores have been issued in this cycle. Increase the
32913 priority of the first load in the ready list to favor it for
32914 issuing in the next cycle. */
32915 pos = *pn_ready-1;
32917 while (pos >= 0)
32919 if (is_load_insn (ready[pos], &load_mem)
32920 && !sel_sched_p ()
32921 && INSN_PRIORITY_KNOWN (ready[pos]))
32923 INSN_PRIORITY (ready[pos])++;
32925 /* Adjust the pendulum to account for the fact that a load
32926 was found and increased in priority. This is to prevent
32927 increasing the priority of multiple loads */
32928 load_store_pendulum--;
32930 break;
32932 pos--;
32935 else if (load_store_pendulum == -1)
32937 /* A store has been issued in this cycle. Scan the ready list for
32938 another store to issue with it, preferring a store to an adjacent
32939 memory location */
32940 int first_store_pos = -1;
32942 pos = *pn_ready-1;
32944 while (pos >= 0)
32946 if (is_store_insn (ready[pos], &str_mem))
32948 rtx str_mem2;
32949 /* Maintain the index of the first store found on the
32950 list */
32951 if (first_store_pos == -1)
32952 first_store_pos = pos;
32954 if (is_store_insn (last_scheduled_insn, &str_mem2)
32955 && adjacent_mem_locations (str_mem, str_mem2))
32957 /* Found an adjacent store. Move it to the head of the
32958 ready list, and adjust it's priority so that it is
32959 more likely to stay there */
32960 tmp = ready[pos];
32961 for (i=pos; i<*pn_ready-1; i++)
32962 ready[i] = ready[i + 1];
32963 ready[*pn_ready-1] = tmp;
32965 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32966 INSN_PRIORITY (tmp)++;
32968 first_store_pos = -1;
32970 break;
32973 pos--;
32976 if (first_store_pos >= 0)
32978 /* An adjacent store wasn't found, but a non-adjacent store was,
32979 so move the non-adjacent store to the front of the ready
32980 list, and adjust its priority so that it is more likely to
32981 stay there. */
32982 tmp = ready[first_store_pos];
32983 for (i=first_store_pos; i<*pn_ready-1; i++)
32984 ready[i] = ready[i + 1];
32985 ready[*pn_ready-1] = tmp;
32986 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32987 INSN_PRIORITY (tmp)++;
32990 else if (load_store_pendulum == 2)
32992 /* Two loads have been issued in this cycle. Increase the priority
32993 of the first store in the ready list to favor it for issuing in
32994 the next cycle. */
32995 pos = *pn_ready-1;
32997 while (pos >= 0)
32999 if (is_store_insn (ready[pos], &str_mem)
33000 && !sel_sched_p ()
33001 && INSN_PRIORITY_KNOWN (ready[pos]))
33003 INSN_PRIORITY (ready[pos])++;
33005 /* Adjust the pendulum to account for the fact that a store
33006 was found and increased in priority. This is to prevent
33007 increasing the priority of multiple stores */
33008 load_store_pendulum++;
33010 break;
33012 pos--;
33017 /* Do Power9 dependent reordering if necessary. */
33018 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
33019 && recog_memoized (last_scheduled_insn) >= 0)
33020 return power9_sched_reorder2 (ready, *pn_ready - 1);
33022 return cached_can_issue_more;
33025 /* Return whether the presence of INSN causes a dispatch group termination
33026 of group WHICH_GROUP.
33028 If WHICH_GROUP == current_group, this function will return true if INSN
33029 causes the termination of the current group (i.e, the dispatch group to
33030 which INSN belongs). This means that INSN will be the last insn in the
33031 group it belongs to.
33033 If WHICH_GROUP == previous_group, this function will return true if INSN
33034 causes the termination of the previous group (i.e, the dispatch group that
33035 precedes the group to which INSN belongs). This means that INSN will be
33036 the first insn in the group it belongs to). */
33038 static bool
33039 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
33041 bool first, last;
33043 if (! insn)
33044 return false;
33046 first = insn_must_be_first_in_group (insn);
33047 last = insn_must_be_last_in_group (insn);
33049 if (first && last)
33050 return true;
33052 if (which_group == current_group)
33053 return last;
33054 else if (which_group == previous_group)
33055 return first;
33057 return false;
33061 static bool
33062 insn_must_be_first_in_group (rtx_insn *insn)
33064 enum attr_type type;
33066 if (!insn
33067 || NOTE_P (insn)
33068 || DEBUG_INSN_P (insn)
33069 || GET_CODE (PATTERN (insn)) == USE
33070 || GET_CODE (PATTERN (insn)) == CLOBBER)
33071 return false;
33073 switch (rs6000_cpu)
33075 case PROCESSOR_POWER5:
33076 if (is_cracked_insn (insn))
33077 return true;
33078 /* FALLTHRU */
33079 case PROCESSOR_POWER4:
33080 if (is_microcoded_insn (insn))
33081 return true;
33083 if (!rs6000_sched_groups)
33084 return false;
33086 type = get_attr_type (insn);
33088 switch (type)
33090 case TYPE_MFCR:
33091 case TYPE_MFCRF:
33092 case TYPE_MTCR:
33093 case TYPE_DELAYED_CR:
33094 case TYPE_CR_LOGICAL:
33095 case TYPE_MTJMPR:
33096 case TYPE_MFJMPR:
33097 case TYPE_DIV:
33098 case TYPE_LOAD_L:
33099 case TYPE_STORE_C:
33100 case TYPE_ISYNC:
33101 case TYPE_SYNC:
33102 return true;
33103 default:
33104 break;
33106 break;
33107 case PROCESSOR_POWER6:
33108 type = get_attr_type (insn);
33110 switch (type)
33112 case TYPE_EXTS:
33113 case TYPE_CNTLZ:
33114 case TYPE_TRAP:
33115 case TYPE_MUL:
33116 case TYPE_INSERT:
33117 case TYPE_FPCOMPARE:
33118 case TYPE_MFCR:
33119 case TYPE_MTCR:
33120 case TYPE_MFJMPR:
33121 case TYPE_MTJMPR:
33122 case TYPE_ISYNC:
33123 case TYPE_SYNC:
33124 case TYPE_LOAD_L:
33125 case TYPE_STORE_C:
33126 return true;
33127 case TYPE_SHIFT:
33128 if (get_attr_dot (insn) == DOT_NO
33129 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33130 return true;
33131 else
33132 break;
33133 case TYPE_DIV:
33134 if (get_attr_size (insn) == SIZE_32)
33135 return true;
33136 else
33137 break;
33138 case TYPE_LOAD:
33139 case TYPE_STORE:
33140 case TYPE_FPLOAD:
33141 case TYPE_FPSTORE:
33142 if (get_attr_update (insn) == UPDATE_YES)
33143 return true;
33144 else
33145 break;
33146 default:
33147 break;
33149 break;
33150 case PROCESSOR_POWER7:
33151 type = get_attr_type (insn);
33153 switch (type)
33155 case TYPE_CR_LOGICAL:
33156 case TYPE_MFCR:
33157 case TYPE_MFCRF:
33158 case TYPE_MTCR:
33159 case TYPE_DIV:
33160 case TYPE_ISYNC:
33161 case TYPE_LOAD_L:
33162 case TYPE_STORE_C:
33163 case TYPE_MFJMPR:
33164 case TYPE_MTJMPR:
33165 return true;
33166 case TYPE_MUL:
33167 case TYPE_SHIFT:
33168 case TYPE_EXTS:
33169 if (get_attr_dot (insn) == DOT_YES)
33170 return true;
33171 else
33172 break;
33173 case TYPE_LOAD:
33174 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33175 || get_attr_update (insn) == UPDATE_YES)
33176 return true;
33177 else
33178 break;
33179 case TYPE_STORE:
33180 case TYPE_FPLOAD:
33181 case TYPE_FPSTORE:
33182 if (get_attr_update (insn) == UPDATE_YES)
33183 return true;
33184 else
33185 break;
33186 default:
33187 break;
33189 break;
33190 case PROCESSOR_POWER8:
33191 type = get_attr_type (insn);
33193 switch (type)
33195 case TYPE_CR_LOGICAL:
33196 case TYPE_DELAYED_CR:
33197 case TYPE_MFCR:
33198 case TYPE_MFCRF:
33199 case TYPE_MTCR:
33200 case TYPE_SYNC:
33201 case TYPE_ISYNC:
33202 case TYPE_LOAD_L:
33203 case TYPE_STORE_C:
33204 case TYPE_VECSTORE:
33205 case TYPE_MFJMPR:
33206 case TYPE_MTJMPR:
33207 return true;
33208 case TYPE_SHIFT:
33209 case TYPE_EXTS:
33210 case TYPE_MUL:
33211 if (get_attr_dot (insn) == DOT_YES)
33212 return true;
33213 else
33214 break;
33215 case TYPE_LOAD:
33216 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33217 || get_attr_update (insn) == UPDATE_YES)
33218 return true;
33219 else
33220 break;
33221 case TYPE_STORE:
33222 if (get_attr_update (insn) == UPDATE_YES
33223 && get_attr_indexed (insn) == INDEXED_YES)
33224 return true;
33225 else
33226 break;
33227 default:
33228 break;
33230 break;
33231 default:
33232 break;
33235 return false;
33238 static bool
33239 insn_must_be_last_in_group (rtx_insn *insn)
33241 enum attr_type type;
33243 if (!insn
33244 || NOTE_P (insn)
33245 || DEBUG_INSN_P (insn)
33246 || GET_CODE (PATTERN (insn)) == USE
33247 || GET_CODE (PATTERN (insn)) == CLOBBER)
33248 return false;
33250 switch (rs6000_cpu) {
33251 case PROCESSOR_POWER4:
33252 case PROCESSOR_POWER5:
33253 if (is_microcoded_insn (insn))
33254 return true;
33256 if (is_branch_slot_insn (insn))
33257 return true;
33259 break;
33260 case PROCESSOR_POWER6:
33261 type = get_attr_type (insn);
33263 switch (type)
33265 case TYPE_EXTS:
33266 case TYPE_CNTLZ:
33267 case TYPE_TRAP:
33268 case TYPE_MUL:
33269 case TYPE_FPCOMPARE:
33270 case TYPE_MFCR:
33271 case TYPE_MTCR:
33272 case TYPE_MFJMPR:
33273 case TYPE_MTJMPR:
33274 case TYPE_ISYNC:
33275 case TYPE_SYNC:
33276 case TYPE_LOAD_L:
33277 case TYPE_STORE_C:
33278 return true;
33279 case TYPE_SHIFT:
33280 if (get_attr_dot (insn) == DOT_NO
33281 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33282 return true;
33283 else
33284 break;
33285 case TYPE_DIV:
33286 if (get_attr_size (insn) == SIZE_32)
33287 return true;
33288 else
33289 break;
33290 default:
33291 break;
33293 break;
33294 case PROCESSOR_POWER7:
33295 type = get_attr_type (insn);
33297 switch (type)
33299 case TYPE_ISYNC:
33300 case TYPE_SYNC:
33301 case TYPE_LOAD_L:
33302 case TYPE_STORE_C:
33303 return true;
33304 case TYPE_LOAD:
33305 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33306 && get_attr_update (insn) == UPDATE_YES)
33307 return true;
33308 else
33309 break;
33310 case TYPE_STORE:
33311 if (get_attr_update (insn) == UPDATE_YES
33312 && get_attr_indexed (insn) == INDEXED_YES)
33313 return true;
33314 else
33315 break;
33316 default:
33317 break;
33319 break;
33320 case PROCESSOR_POWER8:
33321 type = get_attr_type (insn);
33323 switch (type)
33325 case TYPE_MFCR:
33326 case TYPE_MTCR:
33327 case TYPE_ISYNC:
33328 case TYPE_SYNC:
33329 case TYPE_LOAD_L:
33330 case TYPE_STORE_C:
33331 return true;
33332 case TYPE_LOAD:
33333 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33334 && get_attr_update (insn) == UPDATE_YES)
33335 return true;
33336 else
33337 break;
33338 case TYPE_STORE:
33339 if (get_attr_update (insn) == UPDATE_YES
33340 && get_attr_indexed (insn) == INDEXED_YES)
33341 return true;
33342 else
33343 break;
33344 default:
33345 break;
33347 break;
33348 default:
33349 break;
33352 return false;
33355 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
33356 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
33358 static bool
33359 is_costly_group (rtx *group_insns, rtx next_insn)
33361 int i;
33362 int issue_rate = rs6000_issue_rate ();
33364 for (i = 0; i < issue_rate; i++)
33366 sd_iterator_def sd_it;
33367 dep_t dep;
33368 rtx insn = group_insns[i];
33370 if (!insn)
33371 continue;
33373 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
33375 rtx next = DEP_CON (dep);
33377 if (next == next_insn
33378 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
33379 return true;
33383 return false;
33386 /* Utility of the function redefine_groups.
33387 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
33388 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
33389 to keep it "far" (in a separate group) from GROUP_INSNS, following
33390 one of the following schemes, depending on the value of the flag
33391 -minsert_sched_nops = X:
33392 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
33393 in order to force NEXT_INSN into a separate group.
33394 (2) X < sched_finish_regroup_exact: insert exactly X nops.
33395 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
33396 insertion (has a group just ended, how many vacant issue slots remain in the
33397 last group, and how many dispatch groups were encountered so far). */
33399 static int
33400 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
33401 rtx_insn *next_insn, bool *group_end, int can_issue_more,
33402 int *group_count)
33404 rtx nop;
33405 bool force;
33406 int issue_rate = rs6000_issue_rate ();
33407 bool end = *group_end;
33408 int i;
33410 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
33411 return can_issue_more;
33413 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
33414 return can_issue_more;
33416 force = is_costly_group (group_insns, next_insn);
33417 if (!force)
33418 return can_issue_more;
33420 if (sched_verbose > 6)
33421 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
33422 *group_count ,can_issue_more);
33424 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
33426 if (*group_end)
33427 can_issue_more = 0;
33429 /* Since only a branch can be issued in the last issue_slot, it is
33430 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
33431 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
33432 in this case the last nop will start a new group and the branch
33433 will be forced to the new group. */
33434 if (can_issue_more && !is_branch_slot_insn (next_insn))
33435 can_issue_more--;
33437 /* Do we have a special group ending nop? */
33438 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
33439 || rs6000_cpu_attr == CPU_POWER8)
33441 nop = gen_group_ending_nop ();
33442 emit_insn_before (nop, next_insn);
33443 can_issue_more = 0;
33445 else
33446 while (can_issue_more > 0)
33448 nop = gen_nop ();
33449 emit_insn_before (nop, next_insn);
33450 can_issue_more--;
33453 *group_end = true;
33454 return 0;
33457 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
33459 int n_nops = rs6000_sched_insert_nops;
33461 /* Nops can't be issued from the branch slot, so the effective
33462 issue_rate for nops is 'issue_rate - 1'. */
33463 if (can_issue_more == 0)
33464 can_issue_more = issue_rate;
33465 can_issue_more--;
33466 if (can_issue_more == 0)
33468 can_issue_more = issue_rate - 1;
33469 (*group_count)++;
33470 end = true;
33471 for (i = 0; i < issue_rate; i++)
33473 group_insns[i] = 0;
33477 while (n_nops > 0)
33479 nop = gen_nop ();
33480 emit_insn_before (nop, next_insn);
33481 if (can_issue_more == issue_rate - 1) /* new group begins */
33482 end = false;
33483 can_issue_more--;
33484 if (can_issue_more == 0)
33486 can_issue_more = issue_rate - 1;
33487 (*group_count)++;
33488 end = true;
33489 for (i = 0; i < issue_rate; i++)
33491 group_insns[i] = 0;
33494 n_nops--;
33497 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
33498 can_issue_more++;
33500 /* Is next_insn going to start a new group? */
33501 *group_end
33502 = (end
33503 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33504 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33505 || (can_issue_more < issue_rate &&
33506 insn_terminates_group_p (next_insn, previous_group)));
33507 if (*group_end && end)
33508 (*group_count)--;
33510 if (sched_verbose > 6)
33511 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
33512 *group_count, can_issue_more);
33513 return can_issue_more;
33516 return can_issue_more;
33519 /* This function tries to synch the dispatch groups that the compiler "sees"
33520 with the dispatch groups that the processor dispatcher is expected to
33521 form in practice. It tries to achieve this synchronization by forcing the
33522 estimated processor grouping on the compiler (as opposed to the function
33523 'pad_goups' which tries to force the scheduler's grouping on the processor).
33525 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
33526 examines the (estimated) dispatch groups that will be formed by the processor
33527 dispatcher. It marks these group boundaries to reflect the estimated
33528 processor grouping, overriding the grouping that the scheduler had marked.
33529 Depending on the value of the flag '-minsert-sched-nops' this function can
33530 force certain insns into separate groups or force a certain distance between
33531 them by inserting nops, for example, if there exists a "costly dependence"
33532 between the insns.
33534 The function estimates the group boundaries that the processor will form as
33535 follows: It keeps track of how many vacant issue slots are available after
33536 each insn. A subsequent insn will start a new group if one of the following
33537 4 cases applies:
33538 - no more vacant issue slots remain in the current dispatch group.
33539 - only the last issue slot, which is the branch slot, is vacant, but the next
33540 insn is not a branch.
33541 - only the last 2 or less issue slots, including the branch slot, are vacant,
33542 which means that a cracked insn (which occupies two issue slots) can't be
33543 issued in this group.
33544 - less than 'issue_rate' slots are vacant, and the next insn always needs to
33545 start a new group. */
33547 static int
33548 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33549 rtx_insn *tail)
33551 rtx_insn *insn, *next_insn;
33552 int issue_rate;
33553 int can_issue_more;
33554 int slot, i;
33555 bool group_end;
33556 int group_count = 0;
33557 rtx *group_insns;
33559 /* Initialize. */
33560 issue_rate = rs6000_issue_rate ();
33561 group_insns = XALLOCAVEC (rtx, issue_rate);
33562 for (i = 0; i < issue_rate; i++)
33564 group_insns[i] = 0;
33566 can_issue_more = issue_rate;
33567 slot = 0;
33568 insn = get_next_active_insn (prev_head_insn, tail);
33569 group_end = false;
33571 while (insn != NULL_RTX)
33573 slot = (issue_rate - can_issue_more);
33574 group_insns[slot] = insn;
33575 can_issue_more =
33576 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33577 if (insn_terminates_group_p (insn, current_group))
33578 can_issue_more = 0;
33580 next_insn = get_next_active_insn (insn, tail);
33581 if (next_insn == NULL_RTX)
33582 return group_count + 1;
33584 /* Is next_insn going to start a new group? */
33585 group_end
33586 = (can_issue_more == 0
33587 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33588 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33589 || (can_issue_more < issue_rate &&
33590 insn_terminates_group_p (next_insn, previous_group)));
33592 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
33593 next_insn, &group_end, can_issue_more,
33594 &group_count);
33596 if (group_end)
33598 group_count++;
33599 can_issue_more = 0;
33600 for (i = 0; i < issue_rate; i++)
33602 group_insns[i] = 0;
33606 if (GET_MODE (next_insn) == TImode && can_issue_more)
33607 PUT_MODE (next_insn, VOIDmode);
33608 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
33609 PUT_MODE (next_insn, TImode);
33611 insn = next_insn;
33612 if (can_issue_more == 0)
33613 can_issue_more = issue_rate;
33614 } /* while */
33616 return group_count;
33619 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
33620 dispatch group boundaries that the scheduler had marked. Pad with nops
33621 any dispatch groups which have vacant issue slots, in order to force the
33622 scheduler's grouping on the processor dispatcher. The function
33623 returns the number of dispatch groups found. */
33625 static int
33626 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33627 rtx_insn *tail)
33629 rtx_insn *insn, *next_insn;
33630 rtx nop;
33631 int issue_rate;
33632 int can_issue_more;
33633 int group_end;
33634 int group_count = 0;
33636 /* Initialize issue_rate. */
33637 issue_rate = rs6000_issue_rate ();
33638 can_issue_more = issue_rate;
33640 insn = get_next_active_insn (prev_head_insn, tail);
33641 next_insn = get_next_active_insn (insn, tail);
33643 while (insn != NULL_RTX)
33645 can_issue_more =
33646 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33648 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
33650 if (next_insn == NULL_RTX)
33651 break;
33653 if (group_end)
33655 /* If the scheduler had marked group termination at this location
33656 (between insn and next_insn), and neither insn nor next_insn will
33657 force group termination, pad the group with nops to force group
33658 termination. */
33659 if (can_issue_more
33660 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
33661 && !insn_terminates_group_p (insn, current_group)
33662 && !insn_terminates_group_p (next_insn, previous_group))
33664 if (!is_branch_slot_insn (next_insn))
33665 can_issue_more--;
33667 while (can_issue_more)
33669 nop = gen_nop ();
33670 emit_insn_before (nop, next_insn);
33671 can_issue_more--;
33675 can_issue_more = issue_rate;
33676 group_count++;
33679 insn = next_insn;
33680 next_insn = get_next_active_insn (insn, tail);
33683 return group_count;
33686 /* We're beginning a new block. Initialize data structures as necessary. */
33688 static void
33689 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
33690 int sched_verbose ATTRIBUTE_UNUSED,
33691 int max_ready ATTRIBUTE_UNUSED)
33693 last_scheduled_insn = NULL;
33694 load_store_pendulum = 0;
33695 divide_cnt = 0;
33696 vec_load_pendulum = 0;
33699 /* The following function is called at the end of scheduling BB.
33700 After reload, it inserts nops at insn group bundling. */
33702 static void
33703 rs6000_sched_finish (FILE *dump, int sched_verbose)
33705 int n_groups;
33707 if (sched_verbose)
33708 fprintf (dump, "=== Finishing schedule.\n");
33710 if (reload_completed && rs6000_sched_groups)
33712 /* Do not run sched_finish hook when selective scheduling enabled. */
33713 if (sel_sched_p ())
33714 return;
33716 if (rs6000_sched_insert_nops == sched_finish_none)
33717 return;
33719 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
33720 n_groups = pad_groups (dump, sched_verbose,
33721 current_sched_info->prev_head,
33722 current_sched_info->next_tail);
33723 else
33724 n_groups = redefine_groups (dump, sched_verbose,
33725 current_sched_info->prev_head,
33726 current_sched_info->next_tail);
33728 if (sched_verbose >= 6)
33730 fprintf (dump, "ngroups = %d\n", n_groups);
33731 print_rtl (dump, current_sched_info->prev_head);
33732 fprintf (dump, "Done finish_sched\n");
33737 struct rs6000_sched_context
33739 short cached_can_issue_more;
33740 rtx_insn *last_scheduled_insn;
33741 int load_store_pendulum;
33742 int divide_cnt;
33743 int vec_load_pendulum;
33746 typedef struct rs6000_sched_context rs6000_sched_context_def;
33747 typedef rs6000_sched_context_def *rs6000_sched_context_t;
33749 /* Allocate store for new scheduling context. */
33750 static void *
33751 rs6000_alloc_sched_context (void)
33753 return xmalloc (sizeof (rs6000_sched_context_def));
33756 /* If CLEAN_P is true then initializes _SC with clean data,
33757 and from the global context otherwise. */
33758 static void
33759 rs6000_init_sched_context (void *_sc, bool clean_p)
33761 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33763 if (clean_p)
33765 sc->cached_can_issue_more = 0;
33766 sc->last_scheduled_insn = NULL;
33767 sc->load_store_pendulum = 0;
33768 sc->divide_cnt = 0;
33769 sc->vec_load_pendulum = 0;
33771 else
33773 sc->cached_can_issue_more = cached_can_issue_more;
33774 sc->last_scheduled_insn = last_scheduled_insn;
33775 sc->load_store_pendulum = load_store_pendulum;
33776 sc->divide_cnt = divide_cnt;
33777 sc->vec_load_pendulum = vec_load_pendulum;
33781 /* Sets the global scheduling context to the one pointed to by _SC. */
33782 static void
33783 rs6000_set_sched_context (void *_sc)
33785 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33787 gcc_assert (sc != NULL);
33789 cached_can_issue_more = sc->cached_can_issue_more;
33790 last_scheduled_insn = sc->last_scheduled_insn;
33791 load_store_pendulum = sc->load_store_pendulum;
33792 divide_cnt = sc->divide_cnt;
33793 vec_load_pendulum = sc->vec_load_pendulum;
33796 /* Free _SC. */
33797 static void
33798 rs6000_free_sched_context (void *_sc)
33800 gcc_assert (_sc != NULL);
33802 free (_sc);
33806 /* Length in units of the trampoline for entering a nested function. */
33809 rs6000_trampoline_size (void)
33811 int ret = 0;
33813 switch (DEFAULT_ABI)
33815 default:
33816 gcc_unreachable ();
33818 case ABI_AIX:
33819 ret = (TARGET_32BIT) ? 12 : 24;
33820 break;
33822 case ABI_ELFv2:
33823 gcc_assert (!TARGET_32BIT);
33824 ret = 32;
33825 break;
33827 case ABI_DARWIN:
33828 case ABI_V4:
33829 ret = (TARGET_32BIT) ? 40 : 48;
33830 break;
33833 return ret;
33836 /* Emit RTL insns to initialize the variable parts of a trampoline.
33837 FNADDR is an RTX for the address of the function's pure code.
33838 CXT is an RTX for the static chain value for the function. */
33840 static void
33841 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
33843 int regsize = (TARGET_32BIT) ? 4 : 8;
33844 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
33845 rtx ctx_reg = force_reg (Pmode, cxt);
33846 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
33848 switch (DEFAULT_ABI)
33850 default:
33851 gcc_unreachable ();
33853 /* Under AIX, just build the 3 word function descriptor */
33854 case ABI_AIX:
33856 rtx fnmem, fn_reg, toc_reg;
33858 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33859 error ("You cannot take the address of a nested function if you use "
33860 "the -mno-pointers-to-nested-functions option.");
33862 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
33863 fn_reg = gen_reg_rtx (Pmode);
33864 toc_reg = gen_reg_rtx (Pmode);
33866 /* Macro to shorten the code expansions below. */
33867 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
33869 m_tramp = replace_equiv_address (m_tramp, addr);
33871 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
33872 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
33873 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
33874 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
33875 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
33877 # undef MEM_PLUS
33879 break;
33881 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
33882 case ABI_ELFv2:
33883 case ABI_DARWIN:
33884 case ABI_V4:
33885 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
33886 LCT_NORMAL, VOIDmode, 4,
33887 addr, Pmode,
33888 GEN_INT (rs6000_trampoline_size ()), SImode,
33889 fnaddr, Pmode,
33890 ctx_reg, Pmode);
33891 break;
33896 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
33897 identifier as an argument, so the front end shouldn't look it up. */
33899 static bool
33900 rs6000_attribute_takes_identifier_p (const_tree attr_id)
33902 return is_attribute_p ("altivec", attr_id);
33905 /* Handle the "altivec" attribute. The attribute may have
33906 arguments as follows:
33908 __attribute__((altivec(vector__)))
33909 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
33910 __attribute__((altivec(bool__))) (always followed by 'unsigned')
33912 and may appear more than once (e.g., 'vector bool char') in a
33913 given declaration. */
33915 static tree
33916 rs6000_handle_altivec_attribute (tree *node,
33917 tree name ATTRIBUTE_UNUSED,
33918 tree args,
33919 int flags ATTRIBUTE_UNUSED,
33920 bool *no_add_attrs)
33922 tree type = *node, result = NULL_TREE;
33923 machine_mode mode;
33924 int unsigned_p;
33925 char altivec_type
33926 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
33927 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
33928 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
33929 : '?');
33931 while (POINTER_TYPE_P (type)
33932 || TREE_CODE (type) == FUNCTION_TYPE
33933 || TREE_CODE (type) == METHOD_TYPE
33934 || TREE_CODE (type) == ARRAY_TYPE)
33935 type = TREE_TYPE (type);
33937 mode = TYPE_MODE (type);
33939 /* Check for invalid AltiVec type qualifiers. */
33940 if (type == long_double_type_node)
33941 error ("use of %<long double%> in AltiVec types is invalid");
33942 else if (type == boolean_type_node)
33943 error ("use of boolean types in AltiVec types is invalid");
33944 else if (TREE_CODE (type) == COMPLEX_TYPE)
33945 error ("use of %<complex%> in AltiVec types is invalid");
33946 else if (DECIMAL_FLOAT_MODE_P (mode))
33947 error ("use of decimal floating point types in AltiVec types is invalid");
33948 else if (!TARGET_VSX)
33950 if (type == long_unsigned_type_node || type == long_integer_type_node)
33952 if (TARGET_64BIT)
33953 error ("use of %<long%> in AltiVec types is invalid for "
33954 "64-bit code without -mvsx");
33955 else if (rs6000_warn_altivec_long)
33956 warning (0, "use of %<long%> in AltiVec types is deprecated; "
33957 "use %<int%>");
33959 else if (type == long_long_unsigned_type_node
33960 || type == long_long_integer_type_node)
33961 error ("use of %<long long%> in AltiVec types is invalid without "
33962 "-mvsx");
33963 else if (type == double_type_node)
33964 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
33967 switch (altivec_type)
33969 case 'v':
33970 unsigned_p = TYPE_UNSIGNED (type);
33971 switch (mode)
33973 case TImode:
33974 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
33975 break;
33976 case DImode:
33977 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
33978 break;
33979 case SImode:
33980 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
33981 break;
33982 case HImode:
33983 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
33984 break;
33985 case QImode:
33986 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
33987 break;
33988 case SFmode: result = V4SF_type_node; break;
33989 case DFmode: result = V2DF_type_node; break;
33990 /* If the user says 'vector int bool', we may be handed the 'bool'
33991 attribute _before_ the 'vector' attribute, and so select the
33992 proper type in the 'b' case below. */
33993 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
33994 case V2DImode: case V2DFmode:
33995 result = type;
33996 default: break;
33998 break;
33999 case 'b':
34000 switch (mode)
34002 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
34003 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
34004 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
34005 case QImode: case V16QImode: result = bool_V16QI_type_node;
34006 default: break;
34008 break;
34009 case 'p':
34010 switch (mode)
34012 case V8HImode: result = pixel_V8HI_type_node;
34013 default: break;
34015 default: break;
34018 /* Propagate qualifiers attached to the element type
34019 onto the vector type. */
34020 if (result && result != type && TYPE_QUALS (type))
34021 result = build_qualified_type (result, TYPE_QUALS (type));
34023 *no_add_attrs = true; /* No need to hang on to the attribute. */
34025 if (result)
34026 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
34028 return NULL_TREE;
34031 /* AltiVec defines four built-in scalar types that serve as vector
34032 elements; we must teach the compiler how to mangle them. */
34034 static const char *
34035 rs6000_mangle_type (const_tree type)
34037 type = TYPE_MAIN_VARIANT (type);
34039 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34040 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34041 return NULL;
34043 if (type == bool_char_type_node) return "U6__boolc";
34044 if (type == bool_short_type_node) return "U6__bools";
34045 if (type == pixel_type_node) return "u7__pixel";
34046 if (type == bool_int_type_node) return "U6__booli";
34047 if (type == bool_long_type_node) return "U6__booll";
34049 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
34050 "g" for IBM extended double, no matter whether it is long double (using
34051 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
34052 if (TARGET_FLOAT128_TYPE)
34054 if (type == ieee128_float_type_node)
34055 return "U10__float128";
34057 if (type == ibm128_float_type_node)
34058 return "g";
34060 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
34061 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
34064 /* Mangle IBM extended float long double as `g' (__float128) on
34065 powerpc*-linux where long-double-64 previously was the default. */
34066 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
34067 && TARGET_ELF
34068 && TARGET_LONG_DOUBLE_128
34069 && !TARGET_IEEEQUAD)
34070 return "g";
34072 /* For all other types, use normal C++ mangling. */
34073 return NULL;
34076 /* Handle a "longcall" or "shortcall" attribute; arguments as in
34077 struct attribute_spec.handler. */
34079 static tree
34080 rs6000_handle_longcall_attribute (tree *node, tree name,
34081 tree args ATTRIBUTE_UNUSED,
34082 int flags ATTRIBUTE_UNUSED,
34083 bool *no_add_attrs)
34085 if (TREE_CODE (*node) != FUNCTION_TYPE
34086 && TREE_CODE (*node) != FIELD_DECL
34087 && TREE_CODE (*node) != TYPE_DECL)
34089 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34090 name);
34091 *no_add_attrs = true;
34094 return NULL_TREE;
34097 /* Set longcall attributes on all functions declared when
34098 rs6000_default_long_calls is true. */
34099 static void
34100 rs6000_set_default_type_attributes (tree type)
34102 if (rs6000_default_long_calls
34103 && (TREE_CODE (type) == FUNCTION_TYPE
34104 || TREE_CODE (type) == METHOD_TYPE))
34105 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
34106 NULL_TREE,
34107 TYPE_ATTRIBUTES (type));
34109 #if TARGET_MACHO
34110 darwin_set_default_type_attributes (type);
34111 #endif
34114 /* Return a reference suitable for calling a function with the
34115 longcall attribute. */
34118 rs6000_longcall_ref (rtx call_ref)
34120 const char *call_name;
34121 tree node;
34123 if (GET_CODE (call_ref) != SYMBOL_REF)
34124 return call_ref;
34126 /* System V adds '.' to the internal name, so skip them. */
34127 call_name = XSTR (call_ref, 0);
34128 if (*call_name == '.')
34130 while (*call_name == '.')
34131 call_name++;
34133 node = get_identifier (call_name);
34134 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
34137 return force_reg (Pmode, call_ref);
34140 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
34141 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
34142 #endif
34144 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34145 struct attribute_spec.handler. */
34146 static tree
34147 rs6000_handle_struct_attribute (tree *node, tree name,
34148 tree args ATTRIBUTE_UNUSED,
34149 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34151 tree *type = NULL;
34152 if (DECL_P (*node))
34154 if (TREE_CODE (*node) == TYPE_DECL)
34155 type = &TREE_TYPE (*node);
34157 else
34158 type = node;
34160 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
34161 || TREE_CODE (*type) == UNION_TYPE)))
34163 warning (OPT_Wattributes, "%qE attribute ignored", name);
34164 *no_add_attrs = true;
34167 else if ((is_attribute_p ("ms_struct", name)
34168 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
34169 || ((is_attribute_p ("gcc_struct", name)
34170 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
34172 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
34173 name);
34174 *no_add_attrs = true;
34177 return NULL_TREE;
34180 static bool
34181 rs6000_ms_bitfield_layout_p (const_tree record_type)
34183 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
34184 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
34185 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
34188 #ifdef USING_ELFOS_H
34190 /* A get_unnamed_section callback, used for switching to toc_section. */
34192 static void
34193 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34195 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34196 && TARGET_MINIMAL_TOC)
34198 if (!toc_initialized)
34200 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34201 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34202 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
34203 fprintf (asm_out_file, "\t.tc ");
34204 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
34205 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34206 fprintf (asm_out_file, "\n");
34208 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34209 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34210 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34211 fprintf (asm_out_file, " = .+32768\n");
34212 toc_initialized = 1;
34214 else
34215 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34217 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34219 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34220 if (!toc_initialized)
34222 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34223 toc_initialized = 1;
34226 else
34228 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34229 if (!toc_initialized)
34231 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34232 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34233 fprintf (asm_out_file, " = .+32768\n");
34234 toc_initialized = 1;
34239 /* Implement TARGET_ASM_INIT_SECTIONS. */
34241 static void
34242 rs6000_elf_asm_init_sections (void)
34244 toc_section
34245 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
34247 sdata2_section
34248 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
34249 SDATA2_SECTION_ASM_OP);
34252 /* Implement TARGET_SELECT_RTX_SECTION. */
34254 static section *
34255 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
34256 unsigned HOST_WIDE_INT align)
34258 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34259 return toc_section;
34260 else
34261 return default_elf_select_rtx_section (mode, x, align);
34264 /* For a SYMBOL_REF, set generic flags and then perform some
34265 target-specific processing.
34267 When the AIX ABI is requested on a non-AIX system, replace the
34268 function name with the real name (with a leading .) rather than the
34269 function descriptor name. This saves a lot of overriding code to
34270 read the prefixes. */
34272 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
34273 static void
34274 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
34276 default_encode_section_info (decl, rtl, first);
34278 if (first
34279 && TREE_CODE (decl) == FUNCTION_DECL
34280 && !TARGET_AIX
34281 && DEFAULT_ABI == ABI_AIX)
34283 rtx sym_ref = XEXP (rtl, 0);
34284 size_t len = strlen (XSTR (sym_ref, 0));
34285 char *str = XALLOCAVEC (char, len + 2);
34286 str[0] = '.';
34287 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
34288 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
34292 static inline bool
34293 compare_section_name (const char *section, const char *templ)
34295 int len;
34297 len = strlen (templ);
34298 return (strncmp (section, templ, len) == 0
34299 && (section[len] == 0 || section[len] == '.'));
34302 bool
34303 rs6000_elf_in_small_data_p (const_tree decl)
34305 if (rs6000_sdata == SDATA_NONE)
34306 return false;
34308 /* We want to merge strings, so we never consider them small data. */
34309 if (TREE_CODE (decl) == STRING_CST)
34310 return false;
34312 /* Functions are never in the small data area. */
34313 if (TREE_CODE (decl) == FUNCTION_DECL)
34314 return false;
34316 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
34318 const char *section = DECL_SECTION_NAME (decl);
34319 if (compare_section_name (section, ".sdata")
34320 || compare_section_name (section, ".sdata2")
34321 || compare_section_name (section, ".gnu.linkonce.s")
34322 || compare_section_name (section, ".sbss")
34323 || compare_section_name (section, ".sbss2")
34324 || compare_section_name (section, ".gnu.linkonce.sb")
34325 || strcmp (section, ".PPC.EMB.sdata0") == 0
34326 || strcmp (section, ".PPC.EMB.sbss0") == 0)
34327 return true;
34329 else
34331 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
34333 if (size > 0
34334 && size <= g_switch_value
34335 /* If it's not public, and we're not going to reference it there,
34336 there's no need to put it in the small data section. */
34337 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
34338 return true;
34341 return false;
34344 #endif /* USING_ELFOS_H */
34346 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
34348 static bool
34349 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
34351 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
34354 /* Do not place thread-local symbols refs in the object blocks. */
34356 static bool
34357 rs6000_use_blocks_for_decl_p (const_tree decl)
34359 return !DECL_THREAD_LOCAL_P (decl);
34362 /* Return a REG that occurs in ADDR with coefficient 1.
34363 ADDR can be effectively incremented by incrementing REG.
34365 r0 is special and we must not select it as an address
34366 register by this routine since our caller will try to
34367 increment the returned register via an "la" instruction. */
34370 find_addr_reg (rtx addr)
34372 while (GET_CODE (addr) == PLUS)
34374 if (GET_CODE (XEXP (addr, 0)) == REG
34375 && REGNO (XEXP (addr, 0)) != 0)
34376 addr = XEXP (addr, 0);
34377 else if (GET_CODE (XEXP (addr, 1)) == REG
34378 && REGNO (XEXP (addr, 1)) != 0)
34379 addr = XEXP (addr, 1);
34380 else if (CONSTANT_P (XEXP (addr, 0)))
34381 addr = XEXP (addr, 1);
34382 else if (CONSTANT_P (XEXP (addr, 1)))
34383 addr = XEXP (addr, 0);
34384 else
34385 gcc_unreachable ();
34387 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
34388 return addr;
34391 void
34392 rs6000_fatal_bad_address (rtx op)
34394 fatal_insn ("bad address", op);
34397 #if TARGET_MACHO
34399 typedef struct branch_island_d {
34400 tree function_name;
34401 tree label_name;
34402 int line_number;
34403 } branch_island;
34406 static vec<branch_island, va_gc> *branch_islands;
34408 /* Remember to generate a branch island for far calls to the given
34409 function. */
34411 static void
34412 add_compiler_branch_island (tree label_name, tree function_name,
34413 int line_number)
34415 branch_island bi = {function_name, label_name, line_number};
34416 vec_safe_push (branch_islands, bi);
34419 /* Generate far-jump branch islands for everything recorded in
34420 branch_islands. Invoked immediately after the last instruction of
34421 the epilogue has been emitted; the branch islands must be appended
34422 to, and contiguous with, the function body. Mach-O stubs are
34423 generated in machopic_output_stub(). */
34425 static void
34426 macho_branch_islands (void)
34428 char tmp_buf[512];
34430 while (!vec_safe_is_empty (branch_islands))
34432 branch_island *bi = &branch_islands->last ();
34433 const char *label = IDENTIFIER_POINTER (bi->label_name);
34434 const char *name = IDENTIFIER_POINTER (bi->function_name);
34435 char name_buf[512];
34436 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
34437 if (name[0] == '*' || name[0] == '&')
34438 strcpy (name_buf, name+1);
34439 else
34441 name_buf[0] = '_';
34442 strcpy (name_buf+1, name);
34444 strcpy (tmp_buf, "\n");
34445 strcat (tmp_buf, label);
34446 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34447 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34448 dbxout_stabd (N_SLINE, bi->line_number);
34449 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34450 if (flag_pic)
34452 if (TARGET_LINK_STACK)
34454 char name[32];
34455 get_ppc476_thunk_name (name);
34456 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
34457 strcat (tmp_buf, name);
34458 strcat (tmp_buf, "\n");
34459 strcat (tmp_buf, label);
34460 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34462 else
34464 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
34465 strcat (tmp_buf, label);
34466 strcat (tmp_buf, "_pic\n");
34467 strcat (tmp_buf, label);
34468 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34471 strcat (tmp_buf, "\taddis r11,r11,ha16(");
34472 strcat (tmp_buf, name_buf);
34473 strcat (tmp_buf, " - ");
34474 strcat (tmp_buf, label);
34475 strcat (tmp_buf, "_pic)\n");
34477 strcat (tmp_buf, "\tmtlr r0\n");
34479 strcat (tmp_buf, "\taddi r12,r11,lo16(");
34480 strcat (tmp_buf, name_buf);
34481 strcat (tmp_buf, " - ");
34482 strcat (tmp_buf, label);
34483 strcat (tmp_buf, "_pic)\n");
34485 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
34487 else
34489 strcat (tmp_buf, ":\nlis r12,hi16(");
34490 strcat (tmp_buf, name_buf);
34491 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
34492 strcat (tmp_buf, name_buf);
34493 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
34495 output_asm_insn (tmp_buf, 0);
34496 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34497 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34498 dbxout_stabd (N_SLINE, bi->line_number);
34499 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34500 branch_islands->pop ();
34504 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
34505 already there or not. */
34507 static int
34508 no_previous_def (tree function_name)
34510 branch_island *bi;
34511 unsigned ix;
34513 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34514 if (function_name == bi->function_name)
34515 return 0;
34516 return 1;
34519 /* GET_PREV_LABEL gets the label name from the previous definition of
34520 the function. */
34522 static tree
34523 get_prev_label (tree function_name)
34525 branch_island *bi;
34526 unsigned ix;
34528 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34529 if (function_name == bi->function_name)
34530 return bi->label_name;
34531 return NULL_TREE;
34534 /* INSN is either a function call or a millicode call. It may have an
34535 unconditional jump in its delay slot.
34537 CALL_DEST is the routine we are calling. */
34539 char *
34540 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
34541 int cookie_operand_number)
34543 static char buf[256];
34544 if (darwin_emit_branch_islands
34545 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
34546 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
34548 tree labelname;
34549 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
34551 if (no_previous_def (funname))
34553 rtx label_rtx = gen_label_rtx ();
34554 char *label_buf, temp_buf[256];
34555 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
34556 CODE_LABEL_NUMBER (label_rtx));
34557 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
34558 labelname = get_identifier (label_buf);
34559 add_compiler_branch_island (labelname, funname, insn_line (insn));
34561 else
34562 labelname = get_prev_label (funname);
34564 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
34565 instruction will reach 'foo', otherwise link as 'bl L42'".
34566 "L42" should be a 'branch island', that will do a far jump to
34567 'foo'. Branch islands are generated in
34568 macho_branch_islands(). */
34569 sprintf (buf, "jbsr %%z%d,%.246s",
34570 dest_operand_number, IDENTIFIER_POINTER (labelname));
34572 else
34573 sprintf (buf, "bl %%z%d", dest_operand_number);
34574 return buf;
34577 /* Generate PIC and indirect symbol stubs. */
34579 void
34580 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34582 unsigned int length;
34583 char *symbol_name, *lazy_ptr_name;
34584 char *local_label_0;
34585 static int label = 0;
34587 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34588 symb = (*targetm.strip_name_encoding) (symb);
34591 length = strlen (symb);
34592 symbol_name = XALLOCAVEC (char, length + 32);
34593 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34595 lazy_ptr_name = XALLOCAVEC (char, length + 32);
34596 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
34598 if (flag_pic == 2)
34599 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
34600 else
34601 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
34603 if (flag_pic == 2)
34605 fprintf (file, "\t.align 5\n");
34607 fprintf (file, "%s:\n", stub);
34608 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34610 label++;
34611 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
34612 sprintf (local_label_0, "\"L%011d$spb\"", label);
34614 fprintf (file, "\tmflr r0\n");
34615 if (TARGET_LINK_STACK)
34617 char name[32];
34618 get_ppc476_thunk_name (name);
34619 fprintf (file, "\tbl %s\n", name);
34620 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34622 else
34624 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
34625 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34627 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
34628 lazy_ptr_name, local_label_0);
34629 fprintf (file, "\tmtlr r0\n");
34630 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
34631 (TARGET_64BIT ? "ldu" : "lwzu"),
34632 lazy_ptr_name, local_label_0);
34633 fprintf (file, "\tmtctr r12\n");
34634 fprintf (file, "\tbctr\n");
34636 else
34638 fprintf (file, "\t.align 4\n");
34640 fprintf (file, "%s:\n", stub);
34641 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34643 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
34644 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
34645 (TARGET_64BIT ? "ldu" : "lwzu"),
34646 lazy_ptr_name);
34647 fprintf (file, "\tmtctr r12\n");
34648 fprintf (file, "\tbctr\n");
34651 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
34652 fprintf (file, "%s:\n", lazy_ptr_name);
34653 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34654 fprintf (file, "%sdyld_stub_binding_helper\n",
34655 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
34658 /* Legitimize PIC addresses. If the address is already
34659 position-independent, we return ORIG. Newly generated
34660 position-independent addresses go into a reg. This is REG if non
34661 zero, otherwise we allocate register(s) as necessary. */
34663 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
34666 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
34667 rtx reg)
34669 rtx base, offset;
34671 if (reg == NULL && ! reload_in_progress && ! reload_completed)
34672 reg = gen_reg_rtx (Pmode);
34674 if (GET_CODE (orig) == CONST)
34676 rtx reg_temp;
34678 if (GET_CODE (XEXP (orig, 0)) == PLUS
34679 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
34680 return orig;
34682 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
34684 /* Use a different reg for the intermediate value, as
34685 it will be marked UNCHANGING. */
34686 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
34687 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
34688 Pmode, reg_temp);
34689 offset =
34690 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
34691 Pmode, reg);
34693 if (GET_CODE (offset) == CONST_INT)
34695 if (SMALL_INT (offset))
34696 return plus_constant (Pmode, base, INTVAL (offset));
34697 else if (! reload_in_progress && ! reload_completed)
34698 offset = force_reg (Pmode, offset);
34699 else
34701 rtx mem = force_const_mem (Pmode, orig);
34702 return machopic_legitimize_pic_address (mem, Pmode, reg);
34705 return gen_rtx_PLUS (Pmode, base, offset);
34708 /* Fall back on generic machopic code. */
34709 return machopic_legitimize_pic_address (orig, mode, reg);
34712 /* Output a .machine directive for the Darwin assembler, and call
34713 the generic start_file routine. */
34715 static void
34716 rs6000_darwin_file_start (void)
34718 static const struct
34720 const char *arg;
34721 const char *name;
34722 HOST_WIDE_INT if_set;
34723 } mapping[] = {
34724 { "ppc64", "ppc64", MASK_64BIT },
34725 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
34726 { "power4", "ppc970", 0 },
34727 { "G5", "ppc970", 0 },
34728 { "7450", "ppc7450", 0 },
34729 { "7400", "ppc7400", MASK_ALTIVEC },
34730 { "G4", "ppc7400", 0 },
34731 { "750", "ppc750", 0 },
34732 { "740", "ppc750", 0 },
34733 { "G3", "ppc750", 0 },
34734 { "604e", "ppc604e", 0 },
34735 { "604", "ppc604", 0 },
34736 { "603e", "ppc603", 0 },
34737 { "603", "ppc603", 0 },
34738 { "601", "ppc601", 0 },
34739 { NULL, "ppc", 0 } };
34740 const char *cpu_id = "";
34741 size_t i;
34743 rs6000_file_start ();
34744 darwin_file_start ();
34746 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
34748 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
34749 cpu_id = rs6000_default_cpu;
34751 if (global_options_set.x_rs6000_cpu_index)
34752 cpu_id = processor_target_table[rs6000_cpu_index].name;
34754 /* Look through the mapping array. Pick the first name that either
34755 matches the argument, has a bit set in IF_SET that is also set
34756 in the target flags, or has a NULL name. */
34758 i = 0;
34759 while (mapping[i].arg != NULL
34760 && strcmp (mapping[i].arg, cpu_id) != 0
34761 && (mapping[i].if_set & rs6000_isa_flags) == 0)
34762 i++;
34764 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
34767 #endif /* TARGET_MACHO */
34769 #if TARGET_ELF
34770 static int
34771 rs6000_elf_reloc_rw_mask (void)
34773 if (flag_pic)
34774 return 3;
34775 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34776 return 2;
34777 else
34778 return 0;
34781 /* Record an element in the table of global constructors. SYMBOL is
34782 a SYMBOL_REF of the function to be called; PRIORITY is a number
34783 between 0 and MAX_INIT_PRIORITY.
34785 This differs from default_named_section_asm_out_constructor in
34786 that we have special handling for -mrelocatable. */
34788 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
34789 static void
34790 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
34792 const char *section = ".ctors";
34793 char buf[18];
34795 if (priority != DEFAULT_INIT_PRIORITY)
34797 sprintf (buf, ".ctors.%.5u",
34798 /* Invert the numbering so the linker puts us in the proper
34799 order; constructors are run from right to left, and the
34800 linker sorts in increasing order. */
34801 MAX_INIT_PRIORITY - priority);
34802 section = buf;
34805 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34806 assemble_align (POINTER_SIZE);
34808 if (DEFAULT_ABI == ABI_V4
34809 && (TARGET_RELOCATABLE || flag_pic > 1))
34811 fputs ("\t.long (", asm_out_file);
34812 output_addr_const (asm_out_file, symbol);
34813 fputs (")@fixup\n", asm_out_file);
34815 else
34816 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34819 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
34820 static void
34821 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
34823 const char *section = ".dtors";
34824 char buf[18];
34826 if (priority != DEFAULT_INIT_PRIORITY)
34828 sprintf (buf, ".dtors.%.5u",
34829 /* Invert the numbering so the linker puts us in the proper
34830 order; constructors are run from right to left, and the
34831 linker sorts in increasing order. */
34832 MAX_INIT_PRIORITY - priority);
34833 section = buf;
34836 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34837 assemble_align (POINTER_SIZE);
34839 if (DEFAULT_ABI == ABI_V4
34840 && (TARGET_RELOCATABLE || flag_pic > 1))
34842 fputs ("\t.long (", asm_out_file);
34843 output_addr_const (asm_out_file, symbol);
34844 fputs (")@fixup\n", asm_out_file);
34846 else
34847 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34850 void
34851 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
34853 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
34855 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
34856 ASM_OUTPUT_LABEL (file, name);
34857 fputs (DOUBLE_INT_ASM_OP, file);
34858 rs6000_output_function_entry (file, name);
34859 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
34860 if (DOT_SYMBOLS)
34862 fputs ("\t.size\t", file);
34863 assemble_name (file, name);
34864 fputs (",24\n\t.type\t.", file);
34865 assemble_name (file, name);
34866 fputs (",@function\n", file);
34867 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
34869 fputs ("\t.globl\t.", file);
34870 assemble_name (file, name);
34871 putc ('\n', file);
34874 else
34875 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34876 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34877 rs6000_output_function_entry (file, name);
34878 fputs (":\n", file);
34879 return;
34882 if (DEFAULT_ABI == ABI_V4
34883 && (TARGET_RELOCATABLE || flag_pic > 1)
34884 && !TARGET_SECURE_PLT
34885 && (get_pool_size () != 0 || crtl->profile)
34886 && uses_TOC ())
34888 char buf[256];
34890 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34892 fprintf (file, "\t.long ");
34893 assemble_name (file, toc_label_name);
34894 need_toc_init = 1;
34895 putc ('-', file);
34896 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34897 assemble_name (file, buf);
34898 putc ('\n', file);
34901 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34902 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34904 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
34906 char buf[256];
34908 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34910 fprintf (file, "\t.quad .TOC.-");
34911 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34912 assemble_name (file, buf);
34913 putc ('\n', file);
34916 if (DEFAULT_ABI == ABI_AIX)
34918 const char *desc_name, *orig_name;
34920 orig_name = (*targetm.strip_name_encoding) (name);
34921 desc_name = orig_name;
34922 while (*desc_name == '.')
34923 desc_name++;
34925 if (TREE_PUBLIC (decl))
34926 fprintf (file, "\t.globl %s\n", desc_name);
34928 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34929 fprintf (file, "%s:\n", desc_name);
34930 fprintf (file, "\t.long %s\n", orig_name);
34931 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
34932 fputs ("\t.long 0\n", file);
34933 fprintf (file, "\t.previous\n");
34935 ASM_OUTPUT_LABEL (file, name);
34938 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
34939 static void
34940 rs6000_elf_file_end (void)
34942 #ifdef HAVE_AS_GNU_ATTRIBUTE
34943 /* ??? The value emitted depends on options active at file end.
34944 Assume anyone using #pragma or attributes that might change
34945 options knows what they are doing. */
34946 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
34947 && rs6000_passes_float)
34949 int fp;
34951 if (TARGET_DF_FPR | TARGET_DF_SPE)
34952 fp = 1;
34953 else if (TARGET_SF_FPR | TARGET_SF_SPE)
34954 fp = 3;
34955 else
34956 fp = 2;
34957 if (rs6000_passes_long_double)
34959 if (!TARGET_LONG_DOUBLE_128)
34960 fp |= 2 * 4;
34961 else if (TARGET_IEEEQUAD)
34962 fp |= 3 * 4;
34963 else
34964 fp |= 1 * 4;
34966 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
34968 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
34970 if (rs6000_passes_vector)
34971 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
34972 (TARGET_ALTIVEC_ABI ? 2
34973 : TARGET_SPE_ABI ? 3
34974 : 1));
34975 if (rs6000_returns_struct)
34976 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
34977 aix_struct_return ? 2 : 1);
34979 #endif
34980 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
34981 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
34982 file_end_indicate_exec_stack ();
34983 #endif
34985 if (flag_split_stack)
34986 file_end_indicate_split_stack ();
34988 if (cpu_builtin_p)
34990 /* We have expanded a CPU builtin, so we need to emit a reference to
34991 the special symbol that LIBC uses to declare it supports the
34992 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
34993 switch_to_section (data_section);
34994 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
34995 fprintf (asm_out_file, "\t%s %s\n",
34996 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
34999 #endif
35001 #if TARGET_XCOFF
35003 #ifndef HAVE_XCOFF_DWARF_EXTRAS
35004 #define HAVE_XCOFF_DWARF_EXTRAS 0
35005 #endif
35007 static enum unwind_info_type
35008 rs6000_xcoff_debug_unwind_info (void)
35010 return UI_NONE;
35013 static void
35014 rs6000_xcoff_asm_output_anchor (rtx symbol)
35016 char buffer[100];
35018 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
35019 SYMBOL_REF_BLOCK_OFFSET (symbol));
35020 fprintf (asm_out_file, "%s", SET_ASM_OP);
35021 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
35022 fprintf (asm_out_file, ",");
35023 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
35024 fprintf (asm_out_file, "\n");
35027 static void
35028 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
35030 fputs (GLOBAL_ASM_OP, stream);
35031 RS6000_OUTPUT_BASENAME (stream, name);
35032 putc ('\n', stream);
35035 /* A get_unnamed_decl callback, used for read-only sections. PTR
35036 points to the section string variable. */
35038 static void
35039 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
35041 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
35042 *(const char *const *) directive,
35043 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35046 /* Likewise for read-write sections. */
35048 static void
35049 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
35051 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
35052 *(const char *const *) directive,
35053 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35056 static void
35057 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
35059 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
35060 *(const char *const *) directive,
35061 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
35064 /* A get_unnamed_section callback, used for switching to toc_section. */
35066 static void
35067 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35069 if (TARGET_MINIMAL_TOC)
35071 /* toc_section is always selected at least once from
35072 rs6000_xcoff_file_start, so this is guaranteed to
35073 always be defined once and only once in each file. */
35074 if (!toc_initialized)
35076 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
35077 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
35078 toc_initialized = 1;
35080 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
35081 (TARGET_32BIT ? "" : ",3"));
35083 else
35084 fputs ("\t.toc\n", asm_out_file);
35087 /* Implement TARGET_ASM_INIT_SECTIONS. */
35089 static void
35090 rs6000_xcoff_asm_init_sections (void)
35092 read_only_data_section
35093 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35094 &xcoff_read_only_section_name);
35096 private_data_section
35097 = get_unnamed_section (SECTION_WRITE,
35098 rs6000_xcoff_output_readwrite_section_asm_op,
35099 &xcoff_private_data_section_name);
35101 tls_data_section
35102 = get_unnamed_section (SECTION_TLS,
35103 rs6000_xcoff_output_tls_section_asm_op,
35104 &xcoff_tls_data_section_name);
35106 tls_private_data_section
35107 = get_unnamed_section (SECTION_TLS,
35108 rs6000_xcoff_output_tls_section_asm_op,
35109 &xcoff_private_data_section_name);
35111 read_only_private_data_section
35112 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35113 &xcoff_private_data_section_name);
35115 toc_section
35116 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
35118 readonly_data_section = read_only_data_section;
35121 static int
35122 rs6000_xcoff_reloc_rw_mask (void)
35124 return 3;
35127 static void
35128 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
35129 tree decl ATTRIBUTE_UNUSED)
35131 int smclass;
35132 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
35134 if (flags & SECTION_EXCLUDE)
35135 smclass = 4;
35136 else if (flags & SECTION_DEBUG)
35138 fprintf (asm_out_file, "\t.dwsect %s\n", name);
35139 return;
35141 else if (flags & SECTION_CODE)
35142 smclass = 0;
35143 else if (flags & SECTION_TLS)
35144 smclass = 3;
35145 else if (flags & SECTION_WRITE)
35146 smclass = 2;
35147 else
35148 smclass = 1;
35150 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
35151 (flags & SECTION_CODE) ? "." : "",
35152 name, suffix[smclass], flags & SECTION_ENTSIZE);
35155 #define IN_NAMED_SECTION(DECL) \
35156 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
35157 && DECL_SECTION_NAME (DECL) != NULL)
35159 static section *
35160 rs6000_xcoff_select_section (tree decl, int reloc,
35161 unsigned HOST_WIDE_INT align)
35163 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
35164 named section. */
35165 if (align > BIGGEST_ALIGNMENT)
35167 resolve_unique_section (decl, reloc, true);
35168 if (IN_NAMED_SECTION (decl))
35169 return get_named_section (decl, NULL, reloc);
35172 if (decl_readonly_section (decl, reloc))
35174 if (TREE_PUBLIC (decl))
35175 return read_only_data_section;
35176 else
35177 return read_only_private_data_section;
35179 else
35181 #if HAVE_AS_TLS
35182 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35184 if (TREE_PUBLIC (decl))
35185 return tls_data_section;
35186 else if (bss_initializer_p (decl))
35188 /* Convert to COMMON to emit in BSS. */
35189 DECL_COMMON (decl) = 1;
35190 return tls_comm_section;
35192 else
35193 return tls_private_data_section;
35195 else
35196 #endif
35197 if (TREE_PUBLIC (decl))
35198 return data_section;
35199 else
35200 return private_data_section;
35204 static void
35205 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
35207 const char *name;
35209 /* Use select_section for private data and uninitialized data with
35210 alignment <= BIGGEST_ALIGNMENT. */
35211 if (!TREE_PUBLIC (decl)
35212 || DECL_COMMON (decl)
35213 || (DECL_INITIAL (decl) == NULL_TREE
35214 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
35215 || DECL_INITIAL (decl) == error_mark_node
35216 || (flag_zero_initialized_in_bss
35217 && initializer_zerop (DECL_INITIAL (decl))))
35218 return;
35220 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35221 name = (*targetm.strip_name_encoding) (name);
35222 set_decl_section_name (decl, name);
35225 /* Select section for constant in constant pool.
35227 On RS/6000, all constants are in the private read-only data area.
35228 However, if this is being placed in the TOC it must be output as a
35229 toc entry. */
35231 static section *
35232 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
35233 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
35235 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35236 return toc_section;
35237 else
35238 return read_only_private_data_section;
35241 /* Remove any trailing [DS] or the like from the symbol name. */
35243 static const char *
35244 rs6000_xcoff_strip_name_encoding (const char *name)
35246 size_t len;
35247 if (*name == '*')
35248 name++;
35249 len = strlen (name);
35250 if (name[len - 1] == ']')
35251 return ggc_alloc_string (name, len - 4);
35252 else
35253 return name;
35256 /* Section attributes. AIX is always PIC. */
35258 static unsigned int
35259 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
35261 unsigned int align;
35262 unsigned int flags = default_section_type_flags (decl, name, reloc);
35264 /* Align to at least UNIT size. */
35265 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
35266 align = MIN_UNITS_PER_WORD;
35267 else
35268 /* Increase alignment of large objects if not already stricter. */
35269 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
35270 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
35271 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
35273 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
35276 /* Output at beginning of assembler file.
35278 Initialize the section names for the RS/6000 at this point.
35280 Specify filename, including full path, to assembler.
35282 We want to go into the TOC section so at least one .toc will be emitted.
35283 Also, in order to output proper .bs/.es pairs, we need at least one static
35284 [RW] section emitted.
35286 Finally, declare mcount when profiling to make the assembler happy. */
35288 static void
35289 rs6000_xcoff_file_start (void)
35291 rs6000_gen_section_name (&xcoff_bss_section_name,
35292 main_input_filename, ".bss_");
35293 rs6000_gen_section_name (&xcoff_private_data_section_name,
35294 main_input_filename, ".rw_");
35295 rs6000_gen_section_name (&xcoff_read_only_section_name,
35296 main_input_filename, ".ro_");
35297 rs6000_gen_section_name (&xcoff_tls_data_section_name,
35298 main_input_filename, ".tls_");
35299 rs6000_gen_section_name (&xcoff_tbss_section_name,
35300 main_input_filename, ".tbss_[UL]");
35302 fputs ("\t.file\t", asm_out_file);
35303 output_quoted_string (asm_out_file, main_input_filename);
35304 fputc ('\n', asm_out_file);
35305 if (write_symbols != NO_DEBUG)
35306 switch_to_section (private_data_section);
35307 switch_to_section (toc_section);
35308 switch_to_section (text_section);
35309 if (profile_flag)
35310 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
35311 rs6000_file_start ();
35314 /* Output at end of assembler file.
35315 On the RS/6000, referencing data should automatically pull in text. */
35317 static void
35318 rs6000_xcoff_file_end (void)
35320 switch_to_section (text_section);
35321 fputs ("_section_.text:\n", asm_out_file);
35322 switch_to_section (data_section);
35323 fputs (TARGET_32BIT
35324 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
35325 asm_out_file);
35328 struct declare_alias_data
35330 FILE *file;
35331 bool function_descriptor;
35334 /* Declare alias N. A helper function for for_node_and_aliases. */
35336 static bool
35337 rs6000_declare_alias (struct symtab_node *n, void *d)
35339 struct declare_alias_data *data = (struct declare_alias_data *)d;
35340 /* Main symbol is output specially, because varasm machinery does part of
35341 the job for us - we do not need to declare .globl/lglobs and such. */
35342 if (!n->alias || n->weakref)
35343 return false;
35345 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
35346 return false;
35348 /* Prevent assemble_alias from trying to use .set pseudo operation
35349 that does not behave as expected by the middle-end. */
35350 TREE_ASM_WRITTEN (n->decl) = true;
35352 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
35353 char *buffer = (char *) alloca (strlen (name) + 2);
35354 char *p;
35355 int dollar_inside = 0;
35357 strcpy (buffer, name);
35358 p = strchr (buffer, '$');
35359 while (p) {
35360 *p = '_';
35361 dollar_inside++;
35362 p = strchr (p + 1, '$');
35364 if (TREE_PUBLIC (n->decl))
35366 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
35368 if (dollar_inside) {
35369 if (data->function_descriptor)
35370 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35371 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35373 if (data->function_descriptor)
35375 fputs ("\t.globl .", data->file);
35376 RS6000_OUTPUT_BASENAME (data->file, buffer);
35377 putc ('\n', data->file);
35379 fputs ("\t.globl ", data->file);
35380 RS6000_OUTPUT_BASENAME (data->file, buffer);
35381 putc ('\n', data->file);
35383 #ifdef ASM_WEAKEN_DECL
35384 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
35385 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
35386 #endif
35388 else
35390 if (dollar_inside)
35392 if (data->function_descriptor)
35393 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35394 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35396 if (data->function_descriptor)
35398 fputs ("\t.lglobl .", data->file);
35399 RS6000_OUTPUT_BASENAME (data->file, buffer);
35400 putc ('\n', data->file);
35402 fputs ("\t.lglobl ", data->file);
35403 RS6000_OUTPUT_BASENAME (data->file, buffer);
35404 putc ('\n', data->file);
35406 if (data->function_descriptor)
35407 fputs (".", data->file);
35408 RS6000_OUTPUT_BASENAME (data->file, buffer);
35409 fputs (":\n", data->file);
35410 return false;
35414 #ifdef HAVE_GAS_HIDDEN
35415 /* Helper function to calculate visibility of a DECL
35416 and return the value as a const string. */
35418 static const char *
35419 rs6000_xcoff_visibility (tree decl)
35421 static const char * const visibility_types[] = {
35422 "", ",protected", ",hidden", ",internal"
35425 enum symbol_visibility vis = DECL_VISIBILITY (decl);
35427 if (TREE_CODE (decl) == FUNCTION_DECL
35428 && cgraph_node::get (decl)
35429 && cgraph_node::get (decl)->instrumentation_clone
35430 && cgraph_node::get (decl)->instrumented_version)
35431 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
35433 return visibility_types[vis];
35435 #endif
35438 /* This macro produces the initial definition of a function name.
35439 On the RS/6000, we need to place an extra '.' in the function name and
35440 output the function descriptor.
35441 Dollar signs are converted to underscores.
35443 The csect for the function will have already been created when
35444 text_section was selected. We do have to go back to that csect, however.
35446 The third and fourth parameters to the .function pseudo-op (16 and 044)
35447 are placeholders which no longer have any use.
35449 Because AIX assembler's .set command has unexpected semantics, we output
35450 all aliases as alternative labels in front of the definition. */
35452 void
35453 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
35455 char *buffer = (char *) alloca (strlen (name) + 1);
35456 char *p;
35457 int dollar_inside = 0;
35458 struct declare_alias_data data = {file, false};
35460 strcpy (buffer, name);
35461 p = strchr (buffer, '$');
35462 while (p) {
35463 *p = '_';
35464 dollar_inside++;
35465 p = strchr (p + 1, '$');
35467 if (TREE_PUBLIC (decl))
35469 if (!RS6000_WEAK || !DECL_WEAK (decl))
35471 if (dollar_inside) {
35472 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35473 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35475 fputs ("\t.globl .", file);
35476 RS6000_OUTPUT_BASENAME (file, buffer);
35477 #ifdef HAVE_GAS_HIDDEN
35478 fputs (rs6000_xcoff_visibility (decl), file);
35479 #endif
35480 putc ('\n', file);
35483 else
35485 if (dollar_inside) {
35486 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35487 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35489 fputs ("\t.lglobl .", file);
35490 RS6000_OUTPUT_BASENAME (file, buffer);
35491 putc ('\n', file);
35493 fputs ("\t.csect ", file);
35494 RS6000_OUTPUT_BASENAME (file, buffer);
35495 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
35496 RS6000_OUTPUT_BASENAME (file, buffer);
35497 fputs (":\n", file);
35498 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35499 &data, true);
35500 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
35501 RS6000_OUTPUT_BASENAME (file, buffer);
35502 fputs (", TOC[tc0], 0\n", file);
35503 in_section = NULL;
35504 switch_to_section (function_section (decl));
35505 putc ('.', file);
35506 RS6000_OUTPUT_BASENAME (file, buffer);
35507 fputs (":\n", file);
35508 data.function_descriptor = true;
35509 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35510 &data, true);
35511 if (!DECL_IGNORED_P (decl))
35513 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35514 xcoffout_declare_function (file, decl, buffer);
35515 else if (write_symbols == DWARF2_DEBUG)
35517 name = (*targetm.strip_name_encoding) (name);
35518 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
35521 return;
35525 /* Output assembly language to globalize a symbol from a DECL,
35526 possibly with visibility. */
35528 void
35529 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
35531 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
35532 fputs (GLOBAL_ASM_OP, stream);
35533 RS6000_OUTPUT_BASENAME (stream, name);
35534 #ifdef HAVE_GAS_HIDDEN
35535 fputs (rs6000_xcoff_visibility (decl), stream);
35536 #endif
35537 putc ('\n', stream);
35540 /* Output assembly language to define a symbol as COMMON from a DECL,
35541 possibly with visibility. */
35543 void
35544 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
35545 tree decl ATTRIBUTE_UNUSED,
35546 const char *name,
35547 unsigned HOST_WIDE_INT size,
35548 unsigned HOST_WIDE_INT align)
35550 unsigned HOST_WIDE_INT align2 = 2;
35552 if (align > 32)
35553 align2 = floor_log2 (align / BITS_PER_UNIT);
35554 else if (size > 4)
35555 align2 = 3;
35557 fputs (COMMON_ASM_OP, stream);
35558 RS6000_OUTPUT_BASENAME (stream, name);
35560 fprintf (stream,
35561 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
35562 size, align2);
35564 #ifdef HAVE_GAS_HIDDEN
35565 fputs (rs6000_xcoff_visibility (decl), stream);
35566 #endif
35567 putc ('\n', stream);
35570 /* This macro produces the initial definition of a object (variable) name.
35571 Because AIX assembler's .set command has unexpected semantics, we output
35572 all aliases as alternative labels in front of the definition. */
35574 void
35575 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
35577 struct declare_alias_data data = {file, false};
35578 RS6000_OUTPUT_BASENAME (file, name);
35579 fputs (":\n", file);
35580 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35581 &data, true);
35584 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
35586 void
35587 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
35589 fputs (integer_asm_op (size, FALSE), file);
35590 assemble_name (file, label);
35591 fputs ("-$", file);
35594 /* Output a symbol offset relative to the dbase for the current object.
35595 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
35596 signed offsets.
35598 __gcc_unwind_dbase is embedded in all executables/libraries through
35599 libgcc/config/rs6000/crtdbase.S. */
35601 void
35602 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
35604 fputs (integer_asm_op (size, FALSE), file);
35605 assemble_name (file, label);
35606 fputs("-__gcc_unwind_dbase", file);
35609 #ifdef HAVE_AS_TLS
35610 static void
35611 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
35613 rtx symbol;
35614 int flags;
35615 const char *symname;
35617 default_encode_section_info (decl, rtl, first);
35619 /* Careful not to prod global register variables. */
35620 if (!MEM_P (rtl))
35621 return;
35622 symbol = XEXP (rtl, 0);
35623 if (GET_CODE (symbol) != SYMBOL_REF)
35624 return;
35626 flags = SYMBOL_REF_FLAGS (symbol);
35628 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35629 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
35631 SYMBOL_REF_FLAGS (symbol) = flags;
35633 /* Append mapping class to extern decls. */
35634 symname = XSTR (symbol, 0);
35635 if (decl /* sync condition with assemble_external () */
35636 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
35637 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
35638 || TREE_CODE (decl) == FUNCTION_DECL)
35639 && symname[strlen (symname) - 1] != ']')
35641 char *newname = (char *) alloca (strlen (symname) + 5);
35642 strcpy (newname, symname);
35643 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
35644 ? "[DS]" : "[UA]"));
35645 XSTR (symbol, 0) = ggc_strdup (newname);
35648 #endif /* HAVE_AS_TLS */
35649 #endif /* TARGET_XCOFF */
35651 void
35652 rs6000_asm_weaken_decl (FILE *stream, tree decl,
35653 const char *name, const char *val)
35655 fputs ("\t.weak\t", stream);
35656 RS6000_OUTPUT_BASENAME (stream, name);
35657 if (decl && TREE_CODE (decl) == FUNCTION_DECL
35658 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
35660 if (TARGET_XCOFF)
35661 fputs ("[DS]", stream);
35662 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
35663 if (TARGET_XCOFF)
35664 fputs (rs6000_xcoff_visibility (decl), stream);
35665 #endif
35666 fputs ("\n\t.weak\t.", stream);
35667 RS6000_OUTPUT_BASENAME (stream, name);
35669 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
35670 if (TARGET_XCOFF)
35671 fputs (rs6000_xcoff_visibility (decl), stream);
35672 #endif
35673 fputc ('\n', stream);
35674 if (val)
35676 ASM_OUTPUT_DEF (stream, name, val);
35677 if (decl && TREE_CODE (decl) == FUNCTION_DECL
35678 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
35680 fputs ("\t.set\t.", stream);
35681 RS6000_OUTPUT_BASENAME (stream, name);
35682 fputs (",.", stream);
35683 RS6000_OUTPUT_BASENAME (stream, val);
35684 fputc ('\n', stream);
35690 /* Return true if INSN should not be copied. */
35692 static bool
35693 rs6000_cannot_copy_insn_p (rtx_insn *insn)
35695 return recog_memoized (insn) >= 0
35696 && get_attr_cannot_copy (insn);
35699 /* Compute a (partial) cost for rtx X. Return true if the complete
35700 cost has been computed, and false if subexpressions should be
35701 scanned. In either case, *TOTAL contains the cost result. */
35703 static bool
35704 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
35705 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
35707 int code = GET_CODE (x);
35709 switch (code)
35711 /* On the RS/6000, if it is valid in the insn, it is free. */
35712 case CONST_INT:
35713 if (((outer_code == SET
35714 || outer_code == PLUS
35715 || outer_code == MINUS)
35716 && (satisfies_constraint_I (x)
35717 || satisfies_constraint_L (x)))
35718 || (outer_code == AND
35719 && (satisfies_constraint_K (x)
35720 || (mode == SImode
35721 ? satisfies_constraint_L (x)
35722 : satisfies_constraint_J (x))))
35723 || ((outer_code == IOR || outer_code == XOR)
35724 && (satisfies_constraint_K (x)
35725 || (mode == SImode
35726 ? satisfies_constraint_L (x)
35727 : satisfies_constraint_J (x))))
35728 || outer_code == ASHIFT
35729 || outer_code == ASHIFTRT
35730 || outer_code == LSHIFTRT
35731 || outer_code == ROTATE
35732 || outer_code == ROTATERT
35733 || outer_code == ZERO_EXTRACT
35734 || (outer_code == MULT
35735 && satisfies_constraint_I (x))
35736 || ((outer_code == DIV || outer_code == UDIV
35737 || outer_code == MOD || outer_code == UMOD)
35738 && exact_log2 (INTVAL (x)) >= 0)
35739 || (outer_code == COMPARE
35740 && (satisfies_constraint_I (x)
35741 || satisfies_constraint_K (x)))
35742 || ((outer_code == EQ || outer_code == NE)
35743 && (satisfies_constraint_I (x)
35744 || satisfies_constraint_K (x)
35745 || (mode == SImode
35746 ? satisfies_constraint_L (x)
35747 : satisfies_constraint_J (x))))
35748 || (outer_code == GTU
35749 && satisfies_constraint_I (x))
35750 || (outer_code == LTU
35751 && satisfies_constraint_P (x)))
35753 *total = 0;
35754 return true;
35756 else if ((outer_code == PLUS
35757 && reg_or_add_cint_operand (x, VOIDmode))
35758 || (outer_code == MINUS
35759 && reg_or_sub_cint_operand (x, VOIDmode))
35760 || ((outer_code == SET
35761 || outer_code == IOR
35762 || outer_code == XOR)
35763 && (INTVAL (x)
35764 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
35766 *total = COSTS_N_INSNS (1);
35767 return true;
35769 /* FALLTHRU */
35771 case CONST_DOUBLE:
35772 case CONST_WIDE_INT:
35773 case CONST:
35774 case HIGH:
35775 case SYMBOL_REF:
35776 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35777 return true;
35779 case MEM:
35780 /* When optimizing for size, MEM should be slightly more expensive
35781 than generating address, e.g., (plus (reg) (const)).
35782 L1 cache latency is about two instructions. */
35783 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35784 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
35785 *total += COSTS_N_INSNS (100);
35786 return true;
35788 case LABEL_REF:
35789 *total = 0;
35790 return true;
35792 case PLUS:
35793 case MINUS:
35794 if (FLOAT_MODE_P (mode))
35795 *total = rs6000_cost->fp;
35796 else
35797 *total = COSTS_N_INSNS (1);
35798 return false;
35800 case MULT:
35801 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35802 && satisfies_constraint_I (XEXP (x, 1)))
35804 if (INTVAL (XEXP (x, 1)) >= -256
35805 && INTVAL (XEXP (x, 1)) <= 255)
35806 *total = rs6000_cost->mulsi_const9;
35807 else
35808 *total = rs6000_cost->mulsi_const;
35810 else if (mode == SFmode)
35811 *total = rs6000_cost->fp;
35812 else if (FLOAT_MODE_P (mode))
35813 *total = rs6000_cost->dmul;
35814 else if (mode == DImode)
35815 *total = rs6000_cost->muldi;
35816 else
35817 *total = rs6000_cost->mulsi;
35818 return false;
35820 case FMA:
35821 if (mode == SFmode)
35822 *total = rs6000_cost->fp;
35823 else
35824 *total = rs6000_cost->dmul;
35825 break;
35827 case DIV:
35828 case MOD:
35829 if (FLOAT_MODE_P (mode))
35831 *total = mode == DFmode ? rs6000_cost->ddiv
35832 : rs6000_cost->sdiv;
35833 return false;
35835 /* FALLTHRU */
35837 case UDIV:
35838 case UMOD:
35839 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35840 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
35842 if (code == DIV || code == MOD)
35843 /* Shift, addze */
35844 *total = COSTS_N_INSNS (2);
35845 else
35846 /* Shift */
35847 *total = COSTS_N_INSNS (1);
35849 else
35851 if (GET_MODE (XEXP (x, 1)) == DImode)
35852 *total = rs6000_cost->divdi;
35853 else
35854 *total = rs6000_cost->divsi;
35856 /* Add in shift and subtract for MOD unless we have a mod instruction. */
35857 if (!TARGET_MODULO && (code == MOD || code == UMOD))
35858 *total += COSTS_N_INSNS (2);
35859 return false;
35861 case CTZ:
35862 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
35863 return false;
35865 case FFS:
35866 *total = COSTS_N_INSNS (4);
35867 return false;
35869 case POPCOUNT:
35870 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
35871 return false;
35873 case PARITY:
35874 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
35875 return false;
35877 case NOT:
35878 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
35879 *total = 0;
35880 else
35881 *total = COSTS_N_INSNS (1);
35882 return false;
35884 case AND:
35885 if (CONST_INT_P (XEXP (x, 1)))
35887 rtx left = XEXP (x, 0);
35888 rtx_code left_code = GET_CODE (left);
35890 /* rotate-and-mask: 1 insn. */
35891 if ((left_code == ROTATE
35892 || left_code == ASHIFT
35893 || left_code == LSHIFTRT)
35894 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
35896 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
35897 if (!CONST_INT_P (XEXP (left, 1)))
35898 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
35899 *total += COSTS_N_INSNS (1);
35900 return true;
35903 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
35904 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
35905 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
35906 || (val & 0xffff) == val
35907 || (val & 0xffff0000) == val
35908 || ((val & 0xffff) == 0 && mode == SImode))
35910 *total = rtx_cost (left, mode, AND, 0, speed);
35911 *total += COSTS_N_INSNS (1);
35912 return true;
35915 /* 2 insns. */
35916 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
35918 *total = rtx_cost (left, mode, AND, 0, speed);
35919 *total += COSTS_N_INSNS (2);
35920 return true;
35924 *total = COSTS_N_INSNS (1);
35925 return false;
35927 case IOR:
35928 /* FIXME */
35929 *total = COSTS_N_INSNS (1);
35930 return true;
35932 case CLZ:
35933 case XOR:
35934 case ZERO_EXTRACT:
35935 *total = COSTS_N_INSNS (1);
35936 return false;
35938 case ASHIFT:
35939 /* The EXTSWSLI instruction is a combined instruction. Don't count both
35940 the sign extend and shift separately within the insn. */
35941 if (TARGET_EXTSWSLI && mode == DImode
35942 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
35943 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
35945 *total = 0;
35946 return false;
35948 /* fall through */
35950 case ASHIFTRT:
35951 case LSHIFTRT:
35952 case ROTATE:
35953 case ROTATERT:
35954 /* Handle mul_highpart. */
35955 if (outer_code == TRUNCATE
35956 && GET_CODE (XEXP (x, 0)) == MULT)
35958 if (mode == DImode)
35959 *total = rs6000_cost->muldi;
35960 else
35961 *total = rs6000_cost->mulsi;
35962 return true;
35964 else if (outer_code == AND)
35965 *total = 0;
35966 else
35967 *total = COSTS_N_INSNS (1);
35968 return false;
35970 case SIGN_EXTEND:
35971 case ZERO_EXTEND:
35972 if (GET_CODE (XEXP (x, 0)) == MEM)
35973 *total = 0;
35974 else
35975 *total = COSTS_N_INSNS (1);
35976 return false;
35978 case COMPARE:
35979 case NEG:
35980 case ABS:
35981 if (!FLOAT_MODE_P (mode))
35983 *total = COSTS_N_INSNS (1);
35984 return false;
35986 /* FALLTHRU */
35988 case FLOAT:
35989 case UNSIGNED_FLOAT:
35990 case FIX:
35991 case UNSIGNED_FIX:
35992 case FLOAT_TRUNCATE:
35993 *total = rs6000_cost->fp;
35994 return false;
35996 case FLOAT_EXTEND:
35997 if (mode == DFmode)
35998 *total = rs6000_cost->sfdf_convert;
35999 else
36000 *total = rs6000_cost->fp;
36001 return false;
36003 case UNSPEC:
36004 switch (XINT (x, 1))
36006 case UNSPEC_FRSP:
36007 *total = rs6000_cost->fp;
36008 return true;
36010 default:
36011 break;
36013 break;
36015 case CALL:
36016 case IF_THEN_ELSE:
36017 if (!speed)
36019 *total = COSTS_N_INSNS (1);
36020 return true;
36022 else if (FLOAT_MODE_P (mode)
36023 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
36025 *total = rs6000_cost->fp;
36026 return false;
36028 break;
36030 case NE:
36031 case EQ:
36032 case GTU:
36033 case LTU:
36034 /* Carry bit requires mode == Pmode.
36035 NEG or PLUS already counted so only add one. */
36036 if (mode == Pmode
36037 && (outer_code == NEG || outer_code == PLUS))
36039 *total = COSTS_N_INSNS (1);
36040 return true;
36042 if (outer_code == SET)
36044 if (XEXP (x, 1) == const0_rtx)
36046 if (TARGET_ISEL && !TARGET_MFCRF)
36047 *total = COSTS_N_INSNS (8);
36048 else
36049 *total = COSTS_N_INSNS (2);
36050 return true;
36052 else
36054 *total = COSTS_N_INSNS (3);
36055 return false;
36058 /* FALLTHRU */
36060 case GT:
36061 case LT:
36062 case UNORDERED:
36063 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
36065 if (TARGET_ISEL && !TARGET_MFCRF)
36066 *total = COSTS_N_INSNS (8);
36067 else
36068 *total = COSTS_N_INSNS (2);
36069 return true;
36071 /* CC COMPARE. */
36072 if (outer_code == COMPARE)
36074 *total = 0;
36075 return true;
36077 break;
36079 default:
36080 break;
36083 return false;
36086 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
36088 static bool
36089 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
36090 int opno, int *total, bool speed)
36092 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
36094 fprintf (stderr,
36095 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
36096 "opno = %d, total = %d, speed = %s, x:\n",
36097 ret ? "complete" : "scan inner",
36098 GET_MODE_NAME (mode),
36099 GET_RTX_NAME (outer_code),
36100 opno,
36101 *total,
36102 speed ? "true" : "false");
36104 debug_rtx (x);
36106 return ret;
36109 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
36111 static int
36112 rs6000_debug_address_cost (rtx x, machine_mode mode,
36113 addr_space_t as, bool speed)
36115 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
36117 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
36118 ret, speed ? "true" : "false");
36119 debug_rtx (x);
36121 return ret;
36125 /* A C expression returning the cost of moving data from a register of class
36126 CLASS1 to one of CLASS2. */
36128 static int
36129 rs6000_register_move_cost (machine_mode mode,
36130 reg_class_t from, reg_class_t to)
36132 int ret;
36134 if (TARGET_DEBUG_COST)
36135 dbg_cost_ctrl++;
36137 /* Moves from/to GENERAL_REGS. */
36138 if (reg_classes_intersect_p (to, GENERAL_REGS)
36139 || reg_classes_intersect_p (from, GENERAL_REGS))
36141 reg_class_t rclass = from;
36143 if (! reg_classes_intersect_p (to, GENERAL_REGS))
36144 rclass = to;
36146 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
36147 ret = (rs6000_memory_move_cost (mode, rclass, false)
36148 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
36150 /* It's more expensive to move CR_REGS than CR0_REGS because of the
36151 shift. */
36152 else if (rclass == CR_REGS)
36153 ret = 4;
36155 /* For those processors that have slow LR/CTR moves, make them more
36156 expensive than memory in order to bias spills to memory .*/
36157 else if ((rs6000_cpu == PROCESSOR_POWER6
36158 || rs6000_cpu == PROCESSOR_POWER7
36159 || rs6000_cpu == PROCESSOR_POWER8
36160 || rs6000_cpu == PROCESSOR_POWER9)
36161 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
36162 ret = 6 * hard_regno_nregs[0][mode];
36164 else
36165 /* A move will cost one instruction per GPR moved. */
36166 ret = 2 * hard_regno_nregs[0][mode];
36169 /* If we have VSX, we can easily move between FPR or Altivec registers. */
36170 else if (VECTOR_MEM_VSX_P (mode)
36171 && reg_classes_intersect_p (to, VSX_REGS)
36172 && reg_classes_intersect_p (from, VSX_REGS))
36173 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
36175 /* Moving between two similar registers is just one instruction. */
36176 else if (reg_classes_intersect_p (to, from))
36177 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
36179 /* Everything else has to go through GENERAL_REGS. */
36180 else
36181 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
36182 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
36184 if (TARGET_DEBUG_COST)
36186 if (dbg_cost_ctrl == 1)
36187 fprintf (stderr,
36188 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
36189 ret, GET_MODE_NAME (mode), reg_class_names[from],
36190 reg_class_names[to]);
36191 dbg_cost_ctrl--;
36194 return ret;
36197 /* A C expressions returning the cost of moving data of MODE from a register to
36198 or from memory. */
36200 static int
36201 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
36202 bool in ATTRIBUTE_UNUSED)
36204 int ret;
36206 if (TARGET_DEBUG_COST)
36207 dbg_cost_ctrl++;
36209 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
36210 ret = 4 * hard_regno_nregs[0][mode];
36211 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
36212 || reg_classes_intersect_p (rclass, VSX_REGS)))
36213 ret = 4 * hard_regno_nregs[32][mode];
36214 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
36215 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
36216 else
36217 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
36219 if (TARGET_DEBUG_COST)
36221 if (dbg_cost_ctrl == 1)
36222 fprintf (stderr,
36223 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
36224 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
36225 dbg_cost_ctrl--;
36228 return ret;
36231 /* Returns a code for a target-specific builtin that implements
36232 reciprocal of the function, or NULL_TREE if not available. */
36234 static tree
36235 rs6000_builtin_reciprocal (tree fndecl)
36237 switch (DECL_FUNCTION_CODE (fndecl))
36239 case VSX_BUILTIN_XVSQRTDP:
36240 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
36241 return NULL_TREE;
36243 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
36245 case VSX_BUILTIN_XVSQRTSP:
36246 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
36247 return NULL_TREE;
36249 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
36251 default:
36252 return NULL_TREE;
36256 /* Load up a constant. If the mode is a vector mode, splat the value across
36257 all of the vector elements. */
36259 static rtx
36260 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
36262 rtx reg;
36264 if (mode == SFmode || mode == DFmode)
36266 rtx d = const_double_from_real_value (dconst, mode);
36267 reg = force_reg (mode, d);
36269 else if (mode == V4SFmode)
36271 rtx d = const_double_from_real_value (dconst, SFmode);
36272 rtvec v = gen_rtvec (4, d, d, d, d);
36273 reg = gen_reg_rtx (mode);
36274 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36276 else if (mode == V2DFmode)
36278 rtx d = const_double_from_real_value (dconst, DFmode);
36279 rtvec v = gen_rtvec (2, d, d);
36280 reg = gen_reg_rtx (mode);
36281 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36283 else
36284 gcc_unreachable ();
36286 return reg;
36289 /* Generate an FMA instruction. */
36291 static void
36292 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
36294 machine_mode mode = GET_MODE (target);
36295 rtx dst;
36297 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
36298 gcc_assert (dst != NULL);
36300 if (dst != target)
36301 emit_move_insn (target, dst);
36304 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
36306 static void
36307 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
36309 machine_mode mode = GET_MODE (dst);
36310 rtx r;
36312 /* This is a tad more complicated, since the fnma_optab is for
36313 a different expression: fma(-m1, m2, a), which is the same
36314 thing except in the case of signed zeros.
36316 Fortunately we know that if FMA is supported that FNMSUB is
36317 also supported in the ISA. Just expand it directly. */
36319 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
36321 r = gen_rtx_NEG (mode, a);
36322 r = gen_rtx_FMA (mode, m1, m2, r);
36323 r = gen_rtx_NEG (mode, r);
36324 emit_insn (gen_rtx_SET (dst, r));
36327 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
36328 add a reg_note saying that this was a division. Support both scalar and
36329 vector divide. Assumes no trapping math and finite arguments. */
36331 void
36332 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
36334 machine_mode mode = GET_MODE (dst);
36335 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
36336 int i;
36338 /* Low precision estimates guarantee 5 bits of accuracy. High
36339 precision estimates guarantee 14 bits of accuracy. SFmode
36340 requires 23 bits of accuracy. DFmode requires 52 bits of
36341 accuracy. Each pass at least doubles the accuracy, leading
36342 to the following. */
36343 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36344 if (mode == DFmode || mode == V2DFmode)
36345 passes++;
36347 enum insn_code code = optab_handler (smul_optab, mode);
36348 insn_gen_fn gen_mul = GEN_FCN (code);
36350 gcc_assert (code != CODE_FOR_nothing);
36352 one = rs6000_load_constant_and_splat (mode, dconst1);
36354 /* x0 = 1./d estimate */
36355 x0 = gen_reg_rtx (mode);
36356 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
36357 UNSPEC_FRES)));
36359 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
36360 if (passes > 1) {
36362 /* e0 = 1. - d * x0 */
36363 e0 = gen_reg_rtx (mode);
36364 rs6000_emit_nmsub (e0, d, x0, one);
36366 /* x1 = x0 + e0 * x0 */
36367 x1 = gen_reg_rtx (mode);
36368 rs6000_emit_madd (x1, e0, x0, x0);
36370 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
36371 ++i, xprev = xnext, eprev = enext) {
36373 /* enext = eprev * eprev */
36374 enext = gen_reg_rtx (mode);
36375 emit_insn (gen_mul (enext, eprev, eprev));
36377 /* xnext = xprev + enext * xprev */
36378 xnext = gen_reg_rtx (mode);
36379 rs6000_emit_madd (xnext, enext, xprev, xprev);
36382 } else
36383 xprev = x0;
36385 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
36387 /* u = n * xprev */
36388 u = gen_reg_rtx (mode);
36389 emit_insn (gen_mul (u, n, xprev));
36391 /* v = n - (d * u) */
36392 v = gen_reg_rtx (mode);
36393 rs6000_emit_nmsub (v, d, u, n);
36395 /* dst = (v * xprev) + u */
36396 rs6000_emit_madd (dst, v, xprev, u);
36398 if (note_p)
36399 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
36402 /* Goldschmidt's Algorithm for single/double-precision floating point
36403 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
36405 void
36406 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
36408 machine_mode mode = GET_MODE (src);
36409 rtx e = gen_reg_rtx (mode);
36410 rtx g = gen_reg_rtx (mode);
36411 rtx h = gen_reg_rtx (mode);
36413 /* Low precision estimates guarantee 5 bits of accuracy. High
36414 precision estimates guarantee 14 bits of accuracy. SFmode
36415 requires 23 bits of accuracy. DFmode requires 52 bits of
36416 accuracy. Each pass at least doubles the accuracy, leading
36417 to the following. */
36418 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36419 if (mode == DFmode || mode == V2DFmode)
36420 passes++;
36422 int i;
36423 rtx mhalf;
36424 enum insn_code code = optab_handler (smul_optab, mode);
36425 insn_gen_fn gen_mul = GEN_FCN (code);
36427 gcc_assert (code != CODE_FOR_nothing);
36429 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
36431 /* e = rsqrt estimate */
36432 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
36433 UNSPEC_RSQRT)));
36435 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
36436 if (!recip)
36438 rtx zero = force_reg (mode, CONST0_RTX (mode));
36440 if (mode == SFmode)
36442 rtx target = emit_conditional_move (e, GT, src, zero, mode,
36443 e, zero, mode, 0);
36444 if (target != e)
36445 emit_move_insn (e, target);
36447 else
36449 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
36450 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
36454 /* g = sqrt estimate. */
36455 emit_insn (gen_mul (g, e, src));
36456 /* h = 1/(2*sqrt) estimate. */
36457 emit_insn (gen_mul (h, e, mhalf));
36459 if (recip)
36461 if (passes == 1)
36463 rtx t = gen_reg_rtx (mode);
36464 rs6000_emit_nmsub (t, g, h, mhalf);
36465 /* Apply correction directly to 1/rsqrt estimate. */
36466 rs6000_emit_madd (dst, e, t, e);
36468 else
36470 for (i = 0; i < passes; i++)
36472 rtx t1 = gen_reg_rtx (mode);
36473 rtx g1 = gen_reg_rtx (mode);
36474 rtx h1 = gen_reg_rtx (mode);
36476 rs6000_emit_nmsub (t1, g, h, mhalf);
36477 rs6000_emit_madd (g1, g, t1, g);
36478 rs6000_emit_madd (h1, h, t1, h);
36480 g = g1;
36481 h = h1;
36483 /* Multiply by 2 for 1/rsqrt. */
36484 emit_insn (gen_add3_insn (dst, h, h));
36487 else
36489 rtx t = gen_reg_rtx (mode);
36490 rs6000_emit_nmsub (t, g, h, mhalf);
36491 rs6000_emit_madd (dst, g, t, g);
36494 return;
36497 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
36498 (Power7) targets. DST is the target, and SRC is the argument operand. */
36500 void
36501 rs6000_emit_popcount (rtx dst, rtx src)
36503 machine_mode mode = GET_MODE (dst);
36504 rtx tmp1, tmp2;
36506 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
36507 if (TARGET_POPCNTD)
36509 if (mode == SImode)
36510 emit_insn (gen_popcntdsi2 (dst, src));
36511 else
36512 emit_insn (gen_popcntddi2 (dst, src));
36513 return;
36516 tmp1 = gen_reg_rtx (mode);
36518 if (mode == SImode)
36520 emit_insn (gen_popcntbsi2 (tmp1, src));
36521 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
36522 NULL_RTX, 0);
36523 tmp2 = force_reg (SImode, tmp2);
36524 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
36526 else
36528 emit_insn (gen_popcntbdi2 (tmp1, src));
36529 tmp2 = expand_mult (DImode, tmp1,
36530 GEN_INT ((HOST_WIDE_INT)
36531 0x01010101 << 32 | 0x01010101),
36532 NULL_RTX, 0);
36533 tmp2 = force_reg (DImode, tmp2);
36534 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
36539 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
36540 target, and SRC is the argument operand. */
36542 void
36543 rs6000_emit_parity (rtx dst, rtx src)
36545 machine_mode mode = GET_MODE (dst);
36546 rtx tmp;
36548 tmp = gen_reg_rtx (mode);
36550 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
36551 if (TARGET_CMPB)
36553 if (mode == SImode)
36555 emit_insn (gen_popcntbsi2 (tmp, src));
36556 emit_insn (gen_paritysi2_cmpb (dst, tmp));
36558 else
36560 emit_insn (gen_popcntbdi2 (tmp, src));
36561 emit_insn (gen_paritydi2_cmpb (dst, tmp));
36563 return;
36566 if (mode == SImode)
36568 /* Is mult+shift >= shift+xor+shift+xor? */
36569 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
36571 rtx tmp1, tmp2, tmp3, tmp4;
36573 tmp1 = gen_reg_rtx (SImode);
36574 emit_insn (gen_popcntbsi2 (tmp1, src));
36576 tmp2 = gen_reg_rtx (SImode);
36577 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
36578 tmp3 = gen_reg_rtx (SImode);
36579 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
36581 tmp4 = gen_reg_rtx (SImode);
36582 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
36583 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
36585 else
36586 rs6000_emit_popcount (tmp, src);
36587 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
36589 else
36591 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
36592 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
36594 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
36596 tmp1 = gen_reg_rtx (DImode);
36597 emit_insn (gen_popcntbdi2 (tmp1, src));
36599 tmp2 = gen_reg_rtx (DImode);
36600 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
36601 tmp3 = gen_reg_rtx (DImode);
36602 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
36604 tmp4 = gen_reg_rtx (DImode);
36605 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
36606 tmp5 = gen_reg_rtx (DImode);
36607 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
36609 tmp6 = gen_reg_rtx (DImode);
36610 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
36611 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
36613 else
36614 rs6000_emit_popcount (tmp, src);
36615 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
36619 /* Expand an Altivec constant permutation for little endian mode.
36620 There are two issues: First, the two input operands must be
36621 swapped so that together they form a double-wide array in LE
36622 order. Second, the vperm instruction has surprising behavior
36623 in LE mode: it interprets the elements of the source vectors
36624 in BE mode ("left to right") and interprets the elements of
36625 the destination vector in LE mode ("right to left"). To
36626 correct for this, we must subtract each element of the permute
36627 control vector from 31.
36629 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
36630 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
36631 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
36632 serve as the permute control vector. Then, in BE mode,
36634 vperm 9,10,11,12
36636 places the desired result in vr9. However, in LE mode the
36637 vector contents will be
36639 vr10 = 00000003 00000002 00000001 00000000
36640 vr11 = 00000007 00000006 00000005 00000004
36642 The result of the vperm using the same permute control vector is
36644 vr9 = 05000000 07000000 01000000 03000000
36646 That is, the leftmost 4 bytes of vr10 are interpreted as the
36647 source for the rightmost 4 bytes of vr9, and so on.
36649 If we change the permute control vector to
36651 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
36653 and issue
36655 vperm 9,11,10,12
36657 we get the desired
36659 vr9 = 00000006 00000004 00000002 00000000. */
36661 void
36662 altivec_expand_vec_perm_const_le (rtx operands[4])
36664 unsigned int i;
36665 rtx perm[16];
36666 rtx constv, unspec;
36667 rtx target = operands[0];
36668 rtx op0 = operands[1];
36669 rtx op1 = operands[2];
36670 rtx sel = operands[3];
36672 /* Unpack and adjust the constant selector. */
36673 for (i = 0; i < 16; ++i)
36675 rtx e = XVECEXP (sel, 0, i);
36676 unsigned int elt = 31 - (INTVAL (e) & 31);
36677 perm[i] = GEN_INT (elt);
36680 /* Expand to a permute, swapping the inputs and using the
36681 adjusted selector. */
36682 if (!REG_P (op0))
36683 op0 = force_reg (V16QImode, op0);
36684 if (!REG_P (op1))
36685 op1 = force_reg (V16QImode, op1);
36687 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
36688 constv = force_reg (V16QImode, constv);
36689 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
36690 UNSPEC_VPERM);
36691 if (!REG_P (target))
36693 rtx tmp = gen_reg_rtx (V16QImode);
36694 emit_move_insn (tmp, unspec);
36695 unspec = tmp;
36698 emit_move_insn (target, unspec);
36701 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
36702 permute control vector. But here it's not a constant, so we must
36703 generate a vector NAND or NOR to do the adjustment. */
36705 void
36706 altivec_expand_vec_perm_le (rtx operands[4])
36708 rtx notx, iorx, unspec;
36709 rtx target = operands[0];
36710 rtx op0 = operands[1];
36711 rtx op1 = operands[2];
36712 rtx sel = operands[3];
36713 rtx tmp = target;
36714 rtx norreg = gen_reg_rtx (V16QImode);
36715 machine_mode mode = GET_MODE (target);
36717 /* Get everything in regs so the pattern matches. */
36718 if (!REG_P (op0))
36719 op0 = force_reg (mode, op0);
36720 if (!REG_P (op1))
36721 op1 = force_reg (mode, op1);
36722 if (!REG_P (sel))
36723 sel = force_reg (V16QImode, sel);
36724 if (!REG_P (target))
36725 tmp = gen_reg_rtx (mode);
36727 if (TARGET_P9_VECTOR)
36729 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
36730 UNSPEC_VPERMR);
36732 else
36734 /* Invert the selector with a VNAND if available, else a VNOR.
36735 The VNAND is preferred for future fusion opportunities. */
36736 notx = gen_rtx_NOT (V16QImode, sel);
36737 iorx = (TARGET_P8_VECTOR
36738 ? gen_rtx_IOR (V16QImode, notx, notx)
36739 : gen_rtx_AND (V16QImode, notx, notx));
36740 emit_insn (gen_rtx_SET (norreg, iorx));
36742 /* Permute with operands reversed and adjusted selector. */
36743 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
36744 UNSPEC_VPERM);
36747 /* Copy into target, possibly by way of a register. */
36748 if (!REG_P (target))
36750 emit_move_insn (tmp, unspec);
36751 unspec = tmp;
36754 emit_move_insn (target, unspec);
36757 /* Expand an Altivec constant permutation. Return true if we match
36758 an efficient implementation; false to fall back to VPERM. */
36760 bool
36761 altivec_expand_vec_perm_const (rtx operands[4])
36763 struct altivec_perm_insn {
36764 HOST_WIDE_INT mask;
36765 enum insn_code impl;
36766 unsigned char perm[16];
36768 static const struct altivec_perm_insn patterns[] = {
36769 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
36770 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
36771 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
36772 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
36773 { OPTION_MASK_ALTIVEC,
36774 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
36775 : CODE_FOR_altivec_vmrglb_direct),
36776 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
36777 { OPTION_MASK_ALTIVEC,
36778 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
36779 : CODE_FOR_altivec_vmrglh_direct),
36780 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
36781 { OPTION_MASK_ALTIVEC,
36782 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
36783 : CODE_FOR_altivec_vmrglw_direct),
36784 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
36785 { OPTION_MASK_ALTIVEC,
36786 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
36787 : CODE_FOR_altivec_vmrghb_direct),
36788 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
36789 { OPTION_MASK_ALTIVEC,
36790 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
36791 : CODE_FOR_altivec_vmrghh_direct),
36792 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
36793 { OPTION_MASK_ALTIVEC,
36794 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
36795 : CODE_FOR_altivec_vmrghw_direct),
36796 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
36797 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
36798 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
36799 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
36800 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
36803 unsigned int i, j, elt, which;
36804 unsigned char perm[16];
36805 rtx target, op0, op1, sel, x;
36806 bool one_vec;
36808 target = operands[0];
36809 op0 = operands[1];
36810 op1 = operands[2];
36811 sel = operands[3];
36813 /* Unpack the constant selector. */
36814 for (i = which = 0; i < 16; ++i)
36816 rtx e = XVECEXP (sel, 0, i);
36817 elt = INTVAL (e) & 31;
36818 which |= (elt < 16 ? 1 : 2);
36819 perm[i] = elt;
36822 /* Simplify the constant selector based on operands. */
36823 switch (which)
36825 default:
36826 gcc_unreachable ();
36828 case 3:
36829 one_vec = false;
36830 if (!rtx_equal_p (op0, op1))
36831 break;
36832 /* FALLTHRU */
36834 case 2:
36835 for (i = 0; i < 16; ++i)
36836 perm[i] &= 15;
36837 op0 = op1;
36838 one_vec = true;
36839 break;
36841 case 1:
36842 op1 = op0;
36843 one_vec = true;
36844 break;
36847 /* Look for splat patterns. */
36848 if (one_vec)
36850 elt = perm[0];
36852 for (i = 0; i < 16; ++i)
36853 if (perm[i] != elt)
36854 break;
36855 if (i == 16)
36857 if (!BYTES_BIG_ENDIAN)
36858 elt = 15 - elt;
36859 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
36860 return true;
36863 if (elt % 2 == 0)
36865 for (i = 0; i < 16; i += 2)
36866 if (perm[i] != elt || perm[i + 1] != elt + 1)
36867 break;
36868 if (i == 16)
36870 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
36871 x = gen_reg_rtx (V8HImode);
36872 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
36873 GEN_INT (field)));
36874 emit_move_insn (target, gen_lowpart (V16QImode, x));
36875 return true;
36879 if (elt % 4 == 0)
36881 for (i = 0; i < 16; i += 4)
36882 if (perm[i] != elt
36883 || perm[i + 1] != elt + 1
36884 || perm[i + 2] != elt + 2
36885 || perm[i + 3] != elt + 3)
36886 break;
36887 if (i == 16)
36889 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
36890 x = gen_reg_rtx (V4SImode);
36891 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
36892 GEN_INT (field)));
36893 emit_move_insn (target, gen_lowpart (V16QImode, x));
36894 return true;
36899 /* Look for merge and pack patterns. */
36900 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
36902 bool swapped;
36904 if ((patterns[j].mask & rs6000_isa_flags) == 0)
36905 continue;
36907 elt = patterns[j].perm[0];
36908 if (perm[0] == elt)
36909 swapped = false;
36910 else if (perm[0] == elt + 16)
36911 swapped = true;
36912 else
36913 continue;
36914 for (i = 1; i < 16; ++i)
36916 elt = patterns[j].perm[i];
36917 if (swapped)
36918 elt = (elt >= 16 ? elt - 16 : elt + 16);
36919 else if (one_vec && elt >= 16)
36920 elt -= 16;
36921 if (perm[i] != elt)
36922 break;
36924 if (i == 16)
36926 enum insn_code icode = patterns[j].impl;
36927 machine_mode omode = insn_data[icode].operand[0].mode;
36928 machine_mode imode = insn_data[icode].operand[1].mode;
36930 /* For little-endian, don't use vpkuwum and vpkuhum if the
36931 underlying vector type is not V4SI and V8HI, respectively.
36932 For example, using vpkuwum with a V8HI picks up the even
36933 halfwords (BE numbering) when the even halfwords (LE
36934 numbering) are what we need. */
36935 if (!BYTES_BIG_ENDIAN
36936 && icode == CODE_FOR_altivec_vpkuwum_direct
36937 && ((GET_CODE (op0) == REG
36938 && GET_MODE (op0) != V4SImode)
36939 || (GET_CODE (op0) == SUBREG
36940 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
36941 continue;
36942 if (!BYTES_BIG_ENDIAN
36943 && icode == CODE_FOR_altivec_vpkuhum_direct
36944 && ((GET_CODE (op0) == REG
36945 && GET_MODE (op0) != V8HImode)
36946 || (GET_CODE (op0) == SUBREG
36947 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
36948 continue;
36950 /* For little-endian, the two input operands must be swapped
36951 (or swapped back) to ensure proper right-to-left numbering
36952 from 0 to 2N-1. */
36953 if (swapped ^ !BYTES_BIG_ENDIAN)
36954 std::swap (op0, op1);
36955 if (imode != V16QImode)
36957 op0 = gen_lowpart (imode, op0);
36958 op1 = gen_lowpart (imode, op1);
36960 if (omode == V16QImode)
36961 x = target;
36962 else
36963 x = gen_reg_rtx (omode);
36964 emit_insn (GEN_FCN (icode) (x, op0, op1));
36965 if (omode != V16QImode)
36966 emit_move_insn (target, gen_lowpart (V16QImode, x));
36967 return true;
36971 if (!BYTES_BIG_ENDIAN)
36973 altivec_expand_vec_perm_const_le (operands);
36974 return true;
36977 return false;
36980 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
36981 Return true if we match an efficient implementation. */
36983 static bool
36984 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
36985 unsigned char perm0, unsigned char perm1)
36987 rtx x;
36989 /* If both selectors come from the same operand, fold to single op. */
36990 if ((perm0 & 2) == (perm1 & 2))
36992 if (perm0 & 2)
36993 op0 = op1;
36994 else
36995 op1 = op0;
36997 /* If both operands are equal, fold to simpler permutation. */
36998 if (rtx_equal_p (op0, op1))
37000 perm0 = perm0 & 1;
37001 perm1 = (perm1 & 1) + 2;
37003 /* If the first selector comes from the second operand, swap. */
37004 else if (perm0 & 2)
37006 if (perm1 & 2)
37007 return false;
37008 perm0 -= 2;
37009 perm1 += 2;
37010 std::swap (op0, op1);
37012 /* If the second selector does not come from the second operand, fail. */
37013 else if ((perm1 & 2) == 0)
37014 return false;
37016 /* Success! */
37017 if (target != NULL)
37019 machine_mode vmode, dmode;
37020 rtvec v;
37022 vmode = GET_MODE (target);
37023 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
37024 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
37025 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
37026 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
37027 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
37028 emit_insn (gen_rtx_SET (target, x));
37030 return true;
37033 bool
37034 rs6000_expand_vec_perm_const (rtx operands[4])
37036 rtx target, op0, op1, sel;
37037 unsigned char perm0, perm1;
37039 target = operands[0];
37040 op0 = operands[1];
37041 op1 = operands[2];
37042 sel = operands[3];
37044 /* Unpack the constant selector. */
37045 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
37046 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
37048 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
37051 /* Test whether a constant permutation is supported. */
37053 static bool
37054 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
37055 const unsigned char *sel)
37057 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
37058 if (TARGET_ALTIVEC)
37059 return true;
37061 /* Check for ps_merge* or evmerge* insns. */
37062 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
37063 || (TARGET_SPE && vmode == V2SImode))
37065 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
37066 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
37067 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
37070 return false;
37073 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
37075 static void
37076 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
37077 machine_mode vmode, unsigned nelt, rtx perm[])
37079 machine_mode imode;
37080 rtx x;
37082 imode = vmode;
37083 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
37085 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
37086 imode = mode_for_vector (imode, nelt);
37089 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
37090 x = expand_vec_perm (vmode, op0, op1, x, target);
37091 if (x != target)
37092 emit_move_insn (target, x);
37095 /* Expand an extract even operation. */
37097 void
37098 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
37100 machine_mode vmode = GET_MODE (target);
37101 unsigned i, nelt = GET_MODE_NUNITS (vmode);
37102 rtx perm[16];
37104 for (i = 0; i < nelt; i++)
37105 perm[i] = GEN_INT (i * 2);
37107 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
37110 /* Expand a vector interleave operation. */
37112 void
37113 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
37115 machine_mode vmode = GET_MODE (target);
37116 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
37117 rtx perm[16];
37119 high = (highp ? 0 : nelt / 2);
37120 for (i = 0; i < nelt / 2; i++)
37122 perm[i * 2] = GEN_INT (i + high);
37123 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
37126 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
37129 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
37130 void
37131 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
37133 HOST_WIDE_INT hwi_scale (scale);
37134 REAL_VALUE_TYPE r_pow;
37135 rtvec v = rtvec_alloc (2);
37136 rtx elt;
37137 rtx scale_vec = gen_reg_rtx (V2DFmode);
37138 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
37139 elt = const_double_from_real_value (r_pow, DFmode);
37140 RTVEC_ELT (v, 0) = elt;
37141 RTVEC_ELT (v, 1) = elt;
37142 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
37143 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
37146 /* Return an RTX representing where to find the function value of a
37147 function returning MODE. */
37148 static rtx
37149 rs6000_complex_function_value (machine_mode mode)
37151 unsigned int regno;
37152 rtx r1, r2;
37153 machine_mode inner = GET_MODE_INNER (mode);
37154 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
37156 if (TARGET_FLOAT128_TYPE
37157 && (mode == KCmode
37158 || (mode == TCmode && TARGET_IEEEQUAD)))
37159 regno = ALTIVEC_ARG_RETURN;
37161 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37162 regno = FP_ARG_RETURN;
37164 else
37166 regno = GP_ARG_RETURN;
37168 /* 32-bit is OK since it'll go in r3/r4. */
37169 if (TARGET_32BIT && inner_bytes >= 4)
37170 return gen_rtx_REG (mode, regno);
37173 if (inner_bytes >= 8)
37174 return gen_rtx_REG (mode, regno);
37176 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
37177 const0_rtx);
37178 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
37179 GEN_INT (inner_bytes));
37180 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
37183 /* Return an rtx describing a return value of MODE as a PARALLEL
37184 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
37185 stride REG_STRIDE. */
37187 static rtx
37188 rs6000_parallel_return (machine_mode mode,
37189 int n_elts, machine_mode elt_mode,
37190 unsigned int regno, unsigned int reg_stride)
37192 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37194 int i;
37195 for (i = 0; i < n_elts; i++)
37197 rtx r = gen_rtx_REG (elt_mode, regno);
37198 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
37199 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
37200 regno += reg_stride;
37203 return par;
37206 /* Target hook for TARGET_FUNCTION_VALUE.
37208 On the SPE, both FPs and vectors are returned in r3.
37210 On RS/6000 an integer value is in r3 and a floating-point value is in
37211 fp1, unless -msoft-float. */
37213 static rtx
37214 rs6000_function_value (const_tree valtype,
37215 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
37216 bool outgoing ATTRIBUTE_UNUSED)
37218 machine_mode mode;
37219 unsigned int regno;
37220 machine_mode elt_mode;
37221 int n_elts;
37223 /* Special handling for structs in darwin64. */
37224 if (TARGET_MACHO
37225 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
37227 CUMULATIVE_ARGS valcum;
37228 rtx valret;
37230 valcum.words = 0;
37231 valcum.fregno = FP_ARG_MIN_REG;
37232 valcum.vregno = ALTIVEC_ARG_MIN_REG;
37233 /* Do a trial code generation as if this were going to be passed as
37234 an argument; if any part goes in memory, we return NULL. */
37235 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
37236 if (valret)
37237 return valret;
37238 /* Otherwise fall through to standard ABI rules. */
37241 mode = TYPE_MODE (valtype);
37243 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
37244 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
37246 int first_reg, n_regs;
37248 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
37250 /* _Decimal128 must use even/odd register pairs. */
37251 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37252 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
37254 else
37256 first_reg = ALTIVEC_ARG_RETURN;
37257 n_regs = 1;
37260 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
37263 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
37264 if (TARGET_32BIT && TARGET_POWERPC64)
37265 switch (mode)
37267 default:
37268 break;
37269 case DImode:
37270 case SCmode:
37271 case DCmode:
37272 case TCmode:
37273 int count = GET_MODE_SIZE (mode) / 4;
37274 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
37277 if ((INTEGRAL_TYPE_P (valtype)
37278 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
37279 || POINTER_TYPE_P (valtype))
37280 mode = TARGET_32BIT ? SImode : DImode;
37282 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37283 /* _Decimal128 must use an even/odd register pair. */
37284 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37285 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
37286 && !FLOAT128_VECTOR_P (mode)
37287 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
37288 regno = FP_ARG_RETURN;
37289 else if (TREE_CODE (valtype) == COMPLEX_TYPE
37290 && targetm.calls.split_complex_arg)
37291 return rs6000_complex_function_value (mode);
37292 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37293 return register is used in both cases, and we won't see V2DImode/V2DFmode
37294 for pure altivec, combine the two cases. */
37295 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
37296 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
37297 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37298 regno = ALTIVEC_ARG_RETURN;
37299 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37300 && (mode == DFmode || mode == DCmode
37301 || FLOAT128_IBM_P (mode) || mode == TCmode))
37302 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37303 else
37304 regno = GP_ARG_RETURN;
37306 return gen_rtx_REG (mode, regno);
37309 /* Define how to find the value returned by a library function
37310 assuming the value has mode MODE. */
37312 rs6000_libcall_value (machine_mode mode)
37314 unsigned int regno;
37316 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
37317 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
37318 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
37320 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37321 /* _Decimal128 must use an even/odd register pair. */
37322 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37323 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
37324 && TARGET_HARD_FLOAT && TARGET_FPRS
37325 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
37326 regno = FP_ARG_RETURN;
37327 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37328 return register is used in both cases, and we won't see V2DImode/V2DFmode
37329 for pure altivec, combine the two cases. */
37330 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
37331 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
37332 regno = ALTIVEC_ARG_RETURN;
37333 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
37334 return rs6000_complex_function_value (mode);
37335 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37336 && (mode == DFmode || mode == DCmode
37337 || FLOAT128_IBM_P (mode) || mode == TCmode))
37338 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37339 else
37340 regno = GP_ARG_RETURN;
37342 return gen_rtx_REG (mode, regno);
37346 /* Return true if we use LRA instead of reload pass. */
37347 static bool
37348 rs6000_lra_p (void)
37350 return TARGET_LRA;
37353 /* Given FROM and TO register numbers, say whether this elimination is allowed.
37354 Frame pointer elimination is automatically handled.
37356 For the RS/6000, if frame pointer elimination is being done, we would like
37357 to convert ap into fp, not sp.
37359 We need r30 if -mminimal-toc was specified, and there are constant pool
37360 references. */
37362 static bool
37363 rs6000_can_eliminate (const int from, const int to)
37365 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
37366 ? ! frame_pointer_needed
37367 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
37368 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
37369 : true);
37372 /* Define the offset between two registers, FROM to be eliminated and its
37373 replacement TO, at the start of a routine. */
37374 HOST_WIDE_INT
37375 rs6000_initial_elimination_offset (int from, int to)
37377 rs6000_stack_t *info = rs6000_stack_info ();
37378 HOST_WIDE_INT offset;
37380 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37381 offset = info->push_p ? 0 : -info->total_size;
37382 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37384 offset = info->push_p ? 0 : -info->total_size;
37385 if (FRAME_GROWS_DOWNWARD)
37386 offset += info->fixed_size + info->vars_size + info->parm_size;
37388 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37389 offset = FRAME_GROWS_DOWNWARD
37390 ? info->fixed_size + info->vars_size + info->parm_size
37391 : 0;
37392 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37393 offset = info->total_size;
37394 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37395 offset = info->push_p ? info->total_size : 0;
37396 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
37397 offset = 0;
37398 else
37399 gcc_unreachable ();
37401 return offset;
37404 static rtx
37405 rs6000_dwarf_register_span (rtx reg)
37407 rtx parts[8];
37408 int i, words;
37409 unsigned regno = REGNO (reg);
37410 machine_mode mode = GET_MODE (reg);
37412 if (TARGET_SPE
37413 && regno < 32
37414 && (SPE_VECTOR_MODE (GET_MODE (reg))
37415 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
37416 && mode != SFmode && mode != SDmode && mode != SCmode)))
37418 else
37419 return NULL_RTX;
37421 regno = REGNO (reg);
37423 /* The duality of the SPE register size wreaks all kinds of havoc.
37424 This is a way of distinguishing r0 in 32-bits from r0 in
37425 64-bits. */
37426 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
37427 gcc_assert (words <= 4);
37428 for (i = 0; i < words; i++, regno++)
37430 if (BYTES_BIG_ENDIAN)
37432 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37433 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
37435 else
37437 parts[2 * i] = gen_rtx_REG (SImode, regno);
37438 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37442 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
37445 /* Fill in sizes for SPE register high parts in table used by unwinder. */
37447 static void
37448 rs6000_init_dwarf_reg_sizes_extra (tree address)
37450 if (TARGET_SPE)
37452 int i;
37453 machine_mode mode = TYPE_MODE (char_type_node);
37454 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37455 rtx mem = gen_rtx_MEM (BLKmode, addr);
37456 rtx value = gen_int_mode (4, mode);
37458 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
37460 int column = DWARF_REG_TO_UNWIND_COLUMN
37461 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37462 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37464 emit_move_insn (adjust_address (mem, mode, offset), value);
37468 if (TARGET_MACHO && ! TARGET_ALTIVEC)
37470 int i;
37471 machine_mode mode = TYPE_MODE (char_type_node);
37472 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37473 rtx mem = gen_rtx_MEM (BLKmode, addr);
37474 rtx value = gen_int_mode (16, mode);
37476 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
37477 The unwinder still needs to know the size of Altivec registers. */
37479 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
37481 int column = DWARF_REG_TO_UNWIND_COLUMN
37482 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37483 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37485 emit_move_insn (adjust_address (mem, mode, offset), value);
37490 /* Map internal gcc register numbers to debug format register numbers.
37491 FORMAT specifies the type of debug register number to use:
37492 0 -- debug information, except for frame-related sections
37493 1 -- DWARF .debug_frame section
37494 2 -- DWARF .eh_frame section */
37496 unsigned int
37497 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
37499 /* We never use the GCC internal number for SPE high registers.
37500 Those are mapped to the 1200..1231 range for all debug formats. */
37501 if (SPE_HIGH_REGNO_P (regno))
37502 return regno - FIRST_SPE_HIGH_REGNO + 1200;
37504 /* Except for the above, we use the internal number for non-DWARF
37505 debug information, and also for .eh_frame. */
37506 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
37507 return regno;
37509 /* On some platforms, we use the standard DWARF register
37510 numbering for .debug_info and .debug_frame. */
37511 #ifdef RS6000_USE_DWARF_NUMBERING
37512 if (regno <= 63)
37513 return regno;
37514 if (regno == LR_REGNO)
37515 return 108;
37516 if (regno == CTR_REGNO)
37517 return 109;
37518 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
37519 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
37520 The actual code emitted saves the whole of CR, so we map CR2_REGNO
37521 to the DWARF reg for CR. */
37522 if (format == 1 && regno == CR2_REGNO)
37523 return 64;
37524 if (CR_REGNO_P (regno))
37525 return regno - CR0_REGNO + 86;
37526 if (regno == CA_REGNO)
37527 return 101; /* XER */
37528 if (ALTIVEC_REGNO_P (regno))
37529 return regno - FIRST_ALTIVEC_REGNO + 1124;
37530 if (regno == VRSAVE_REGNO)
37531 return 356;
37532 if (regno == VSCR_REGNO)
37533 return 67;
37534 if (regno == SPE_ACC_REGNO)
37535 return 99;
37536 if (regno == SPEFSCR_REGNO)
37537 return 612;
37538 #endif
37539 return regno;
37542 /* target hook eh_return_filter_mode */
37543 static machine_mode
37544 rs6000_eh_return_filter_mode (void)
37546 return TARGET_32BIT ? SImode : word_mode;
37549 /* Target hook for scalar_mode_supported_p. */
37550 static bool
37551 rs6000_scalar_mode_supported_p (machine_mode mode)
37553 /* -m32 does not support TImode. This is the default, from
37554 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
37555 same ABI as for -m32. But default_scalar_mode_supported_p allows
37556 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
37557 for -mpowerpc64. */
37558 if (TARGET_32BIT && mode == TImode)
37559 return false;
37561 if (DECIMAL_FLOAT_MODE_P (mode))
37562 return default_decimal_float_supported_p ();
37563 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
37564 return true;
37565 else
37566 return default_scalar_mode_supported_p (mode);
37569 /* Target hook for vector_mode_supported_p. */
37570 static bool
37571 rs6000_vector_mode_supported_p (machine_mode mode)
37574 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
37575 return true;
37577 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
37578 return true;
37580 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
37581 128-bit, the compiler might try to widen IEEE 128-bit to IBM
37582 double-double. */
37583 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
37584 return true;
37586 else
37587 return false;
37590 /* Target hook for floatn_mode. */
37591 static machine_mode
37592 rs6000_floatn_mode (int n, bool extended)
37594 if (extended)
37596 switch (n)
37598 case 32:
37599 return DFmode;
37601 case 64:
37602 if (TARGET_FLOAT128_KEYWORD)
37603 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37604 else
37605 return VOIDmode;
37607 case 128:
37608 return VOIDmode;
37610 default:
37611 /* Those are the only valid _FloatNx types. */
37612 gcc_unreachable ();
37615 else
37617 switch (n)
37619 case 32:
37620 return SFmode;
37622 case 64:
37623 return DFmode;
37625 case 128:
37626 if (TARGET_FLOAT128_KEYWORD)
37627 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37628 else
37629 return VOIDmode;
37631 default:
37632 return VOIDmode;
37638 /* Target hook for c_mode_for_suffix. */
37639 static machine_mode
37640 rs6000_c_mode_for_suffix (char suffix)
37642 if (TARGET_FLOAT128_TYPE)
37644 if (suffix == 'q' || suffix == 'Q')
37645 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37647 /* At the moment, we are not defining a suffix for IBM extended double.
37648 If/when the default for -mabi=ieeelongdouble is changed, and we want
37649 to support __ibm128 constants in legacy library code, we may need to
37650 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
37651 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
37652 __float80 constants. */
37655 return VOIDmode;
37658 /* Target hook for invalid_arg_for_unprototyped_fn. */
37659 static const char *
37660 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
37662 return (!rs6000_darwin64_abi
37663 && typelist == 0
37664 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
37665 && (funcdecl == NULL_TREE
37666 || (TREE_CODE (funcdecl) == FUNCTION_DECL
37667 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
37668 ? N_("AltiVec argument passed to unprototyped function")
37669 : NULL;
37672 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
37673 setup by using __stack_chk_fail_local hidden function instead of
37674 calling __stack_chk_fail directly. Otherwise it is better to call
37675 __stack_chk_fail directly. */
37677 static tree ATTRIBUTE_UNUSED
37678 rs6000_stack_protect_fail (void)
37680 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
37681 ? default_hidden_stack_protect_fail ()
37682 : default_external_stack_protect_fail ();
37685 void
37686 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
37687 int num_operands ATTRIBUTE_UNUSED)
37689 if (rs6000_warn_cell_microcode)
37691 const char *temp;
37692 int insn_code_number = recog_memoized (insn);
37693 location_t location = INSN_LOCATION (insn);
37695 /* Punt on insns we cannot recognize. */
37696 if (insn_code_number < 0)
37697 return;
37699 temp = get_insn_template (insn_code_number, insn);
37701 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
37702 warning_at (location, OPT_mwarn_cell_microcode,
37703 "emitting microcode insn %s\t[%s] #%d",
37704 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37705 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
37706 warning_at (location, OPT_mwarn_cell_microcode,
37707 "emitting conditional microcode insn %s\t[%s] #%d",
37708 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37712 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
37714 #if TARGET_ELF
37715 static unsigned HOST_WIDE_INT
37716 rs6000_asan_shadow_offset (void)
37718 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
37720 #endif
37722 /* Mask options that we want to support inside of attribute((target)) and
37723 #pragma GCC target operations. Note, we do not include things like
37724 64/32-bit, endianess, hard/soft floating point, etc. that would have
37725 different calling sequences. */
37727 struct rs6000_opt_mask {
37728 const char *name; /* option name */
37729 HOST_WIDE_INT mask; /* mask to set */
37730 bool invert; /* invert sense of mask */
37731 bool valid_target; /* option is a target option */
37734 static struct rs6000_opt_mask const rs6000_opt_masks[] =
37736 { "altivec", OPTION_MASK_ALTIVEC, false, true },
37737 { "cmpb", OPTION_MASK_CMPB, false, true },
37738 { "crypto", OPTION_MASK_CRYPTO, false, true },
37739 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
37740 { "dlmzb", OPTION_MASK_DLMZB, false, true },
37741 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
37742 false, true },
37743 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
37744 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
37745 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
37746 { "fprnd", OPTION_MASK_FPRND, false, true },
37747 { "hard-dfp", OPTION_MASK_DFP, false, true },
37748 { "htm", OPTION_MASK_HTM, false, true },
37749 { "isel", OPTION_MASK_ISEL, false, true },
37750 { "mfcrf", OPTION_MASK_MFCRF, false, true },
37751 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
37752 { "modulo", OPTION_MASK_MODULO, false, true },
37753 { "mulhw", OPTION_MASK_MULHW, false, true },
37754 { "multiple", OPTION_MASK_MULTIPLE, false, true },
37755 { "popcntb", OPTION_MASK_POPCNTB, false, true },
37756 { "popcntd", OPTION_MASK_POPCNTD, false, true },
37757 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
37758 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
37759 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
37760 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
37761 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
37762 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
37763 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
37764 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
37765 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
37766 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
37767 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
37768 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
37769 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
37770 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
37771 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
37772 { "string", OPTION_MASK_STRING, false, true },
37773 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
37774 { "update", OPTION_MASK_NO_UPDATE, true , true },
37775 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
37776 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
37777 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
37778 { "vsx", OPTION_MASK_VSX, false, true },
37779 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
37780 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
37781 #ifdef OPTION_MASK_64BIT
37782 #if TARGET_AIX_OS
37783 { "aix64", OPTION_MASK_64BIT, false, false },
37784 { "aix32", OPTION_MASK_64BIT, true, false },
37785 #else
37786 { "64", OPTION_MASK_64BIT, false, false },
37787 { "32", OPTION_MASK_64BIT, true, false },
37788 #endif
37789 #endif
37790 #ifdef OPTION_MASK_EABI
37791 { "eabi", OPTION_MASK_EABI, false, false },
37792 #endif
37793 #ifdef OPTION_MASK_LITTLE_ENDIAN
37794 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
37795 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
37796 #endif
37797 #ifdef OPTION_MASK_RELOCATABLE
37798 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
37799 #endif
37800 #ifdef OPTION_MASK_STRICT_ALIGN
37801 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
37802 #endif
37803 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
37804 { "string", OPTION_MASK_STRING, false, false },
37807 /* Builtin mask mapping for printing the flags. */
37808 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
37810 { "altivec", RS6000_BTM_ALTIVEC, false, false },
37811 { "vsx", RS6000_BTM_VSX, false, false },
37812 { "spe", RS6000_BTM_SPE, false, false },
37813 { "paired", RS6000_BTM_PAIRED, false, false },
37814 { "fre", RS6000_BTM_FRE, false, false },
37815 { "fres", RS6000_BTM_FRES, false, false },
37816 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
37817 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
37818 { "popcntd", RS6000_BTM_POPCNTD, false, false },
37819 { "cell", RS6000_BTM_CELL, false, false },
37820 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
37821 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
37822 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
37823 { "crypto", RS6000_BTM_CRYPTO, false, false },
37824 { "htm", RS6000_BTM_HTM, false, false },
37825 { "hard-dfp", RS6000_BTM_DFP, false, false },
37826 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
37827 { "long-double-128", RS6000_BTM_LDBL128, false, false },
37828 { "float128", RS6000_BTM_FLOAT128, false, false },
37831 /* Option variables that we want to support inside attribute((target)) and
37832 #pragma GCC target operations. */
37834 struct rs6000_opt_var {
37835 const char *name; /* option name */
37836 size_t global_offset; /* offset of the option in global_options. */
37837 size_t target_offset; /* offset of the option in target options. */
37840 static struct rs6000_opt_var const rs6000_opt_vars[] =
37842 { "friz",
37843 offsetof (struct gcc_options, x_TARGET_FRIZ),
37844 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
37845 { "avoid-indexed-addresses",
37846 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
37847 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
37848 { "paired",
37849 offsetof (struct gcc_options, x_rs6000_paired_float),
37850 offsetof (struct cl_target_option, x_rs6000_paired_float), },
37851 { "longcall",
37852 offsetof (struct gcc_options, x_rs6000_default_long_calls),
37853 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
37854 { "optimize-swaps",
37855 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
37856 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
37857 { "allow-movmisalign",
37858 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
37859 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
37860 { "allow-df-permute",
37861 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
37862 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
37863 { "sched-groups",
37864 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
37865 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
37866 { "always-hint",
37867 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
37868 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
37869 { "align-branch-targets",
37870 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
37871 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
37872 { "vectorize-builtins",
37873 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
37874 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
37875 { "tls-markers",
37876 offsetof (struct gcc_options, x_tls_markers),
37877 offsetof (struct cl_target_option, x_tls_markers), },
37878 { "sched-prolog",
37879 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37880 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37881 { "sched-epilog",
37882 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37883 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37884 { "gen-cell-microcode",
37885 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
37886 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
37887 { "warn-cell-microcode",
37888 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
37889 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
37892 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
37893 parsing. Return true if there were no errors. */
37895 static bool
37896 rs6000_inner_target_options (tree args, bool attr_p)
37898 bool ret = true;
37900 if (args == NULL_TREE)
37903 else if (TREE_CODE (args) == STRING_CST)
37905 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37906 char *q;
37908 while ((q = strtok (p, ",")) != NULL)
37910 bool error_p = false;
37911 bool not_valid_p = false;
37912 const char *cpu_opt = NULL;
37914 p = NULL;
37915 if (strncmp (q, "cpu=", 4) == 0)
37917 int cpu_index = rs6000_cpu_name_lookup (q+4);
37918 if (cpu_index >= 0)
37919 rs6000_cpu_index = cpu_index;
37920 else
37922 error_p = true;
37923 cpu_opt = q+4;
37926 else if (strncmp (q, "tune=", 5) == 0)
37928 int tune_index = rs6000_cpu_name_lookup (q+5);
37929 if (tune_index >= 0)
37930 rs6000_tune_index = tune_index;
37931 else
37933 error_p = true;
37934 cpu_opt = q+5;
37937 else
37939 size_t i;
37940 bool invert = false;
37941 char *r = q;
37943 error_p = true;
37944 if (strncmp (r, "no-", 3) == 0)
37946 invert = true;
37947 r += 3;
37950 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
37951 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
37953 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
37955 if (!rs6000_opt_masks[i].valid_target)
37956 not_valid_p = true;
37957 else
37959 error_p = false;
37960 rs6000_isa_flags_explicit |= mask;
37962 /* VSX needs altivec, so -mvsx automagically sets
37963 altivec and disables -mavoid-indexed-addresses. */
37964 if (!invert)
37966 if (mask == OPTION_MASK_VSX)
37968 mask |= OPTION_MASK_ALTIVEC;
37969 TARGET_AVOID_XFORM = 0;
37973 if (rs6000_opt_masks[i].invert)
37974 invert = !invert;
37976 if (invert)
37977 rs6000_isa_flags &= ~mask;
37978 else
37979 rs6000_isa_flags |= mask;
37981 break;
37984 if (error_p && !not_valid_p)
37986 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
37987 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
37989 size_t j = rs6000_opt_vars[i].global_offset;
37990 *((int *) ((char *)&global_options + j)) = !invert;
37991 error_p = false;
37992 not_valid_p = false;
37993 break;
37998 if (error_p)
38000 const char *eprefix, *esuffix;
38002 ret = false;
38003 if (attr_p)
38005 eprefix = "__attribute__((__target__(";
38006 esuffix = ")))";
38008 else
38010 eprefix = "#pragma GCC target ";
38011 esuffix = "";
38014 if (cpu_opt)
38015 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
38016 q, esuffix);
38017 else if (not_valid_p)
38018 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
38019 else
38020 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
38025 else if (TREE_CODE (args) == TREE_LIST)
38029 tree value = TREE_VALUE (args);
38030 if (value)
38032 bool ret2 = rs6000_inner_target_options (value, attr_p);
38033 if (!ret2)
38034 ret = false;
38036 args = TREE_CHAIN (args);
38038 while (args != NULL_TREE);
38041 else
38042 gcc_unreachable ();
38044 return ret;
38047 /* Print out the target options as a list for -mdebug=target. */
38049 static void
38050 rs6000_debug_target_options (tree args, const char *prefix)
38052 if (args == NULL_TREE)
38053 fprintf (stderr, "%s<NULL>", prefix);
38055 else if (TREE_CODE (args) == STRING_CST)
38057 char *p = ASTRDUP (TREE_STRING_POINTER (args));
38058 char *q;
38060 while ((q = strtok (p, ",")) != NULL)
38062 p = NULL;
38063 fprintf (stderr, "%s\"%s\"", prefix, q);
38064 prefix = ", ";
38068 else if (TREE_CODE (args) == TREE_LIST)
38072 tree value = TREE_VALUE (args);
38073 if (value)
38075 rs6000_debug_target_options (value, prefix);
38076 prefix = ", ";
38078 args = TREE_CHAIN (args);
38080 while (args != NULL_TREE);
38083 else
38084 gcc_unreachable ();
38086 return;
38090 /* Hook to validate attribute((target("..."))). */
38092 static bool
38093 rs6000_valid_attribute_p (tree fndecl,
38094 tree ARG_UNUSED (name),
38095 tree args,
38096 int flags)
38098 struct cl_target_option cur_target;
38099 bool ret;
38100 tree old_optimize = build_optimization_node (&global_options);
38101 tree new_target, new_optimize;
38102 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
38104 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
38106 if (TARGET_DEBUG_TARGET)
38108 tree tname = DECL_NAME (fndecl);
38109 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
38110 if (tname)
38111 fprintf (stderr, "function: %.*s\n",
38112 (int) IDENTIFIER_LENGTH (tname),
38113 IDENTIFIER_POINTER (tname));
38114 else
38115 fprintf (stderr, "function: unknown\n");
38117 fprintf (stderr, "args:");
38118 rs6000_debug_target_options (args, " ");
38119 fprintf (stderr, "\n");
38121 if (flags)
38122 fprintf (stderr, "flags: 0x%x\n", flags);
38124 fprintf (stderr, "--------------------\n");
38127 old_optimize = build_optimization_node (&global_options);
38128 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
38130 /* If the function changed the optimization levels as well as setting target
38131 options, start with the optimizations specified. */
38132 if (func_optimize && func_optimize != old_optimize)
38133 cl_optimization_restore (&global_options,
38134 TREE_OPTIMIZATION (func_optimize));
38136 /* The target attributes may also change some optimization flags, so update
38137 the optimization options if necessary. */
38138 cl_target_option_save (&cur_target, &global_options);
38139 rs6000_cpu_index = rs6000_tune_index = -1;
38140 ret = rs6000_inner_target_options (args, true);
38142 /* Set up any additional state. */
38143 if (ret)
38145 ret = rs6000_option_override_internal (false);
38146 new_target = build_target_option_node (&global_options);
38148 else
38149 new_target = NULL;
38151 new_optimize = build_optimization_node (&global_options);
38153 if (!new_target)
38154 ret = false;
38156 else if (fndecl)
38158 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
38160 if (old_optimize != new_optimize)
38161 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
38164 cl_target_option_restore (&global_options, &cur_target);
38166 if (old_optimize != new_optimize)
38167 cl_optimization_restore (&global_options,
38168 TREE_OPTIMIZATION (old_optimize));
38170 return ret;
38174 /* Hook to validate the current #pragma GCC target and set the state, and
38175 update the macros based on what was changed. If ARGS is NULL, then
38176 POP_TARGET is used to reset the options. */
38178 bool
38179 rs6000_pragma_target_parse (tree args, tree pop_target)
38181 tree prev_tree = build_target_option_node (&global_options);
38182 tree cur_tree;
38183 struct cl_target_option *prev_opt, *cur_opt;
38184 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
38185 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
38187 if (TARGET_DEBUG_TARGET)
38189 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
38190 fprintf (stderr, "args:");
38191 rs6000_debug_target_options (args, " ");
38192 fprintf (stderr, "\n");
38194 if (pop_target)
38196 fprintf (stderr, "pop_target:\n");
38197 debug_tree (pop_target);
38199 else
38200 fprintf (stderr, "pop_target: <NULL>\n");
38202 fprintf (stderr, "--------------------\n");
38205 if (! args)
38207 cur_tree = ((pop_target)
38208 ? pop_target
38209 : target_option_default_node);
38210 cl_target_option_restore (&global_options,
38211 TREE_TARGET_OPTION (cur_tree));
38213 else
38215 rs6000_cpu_index = rs6000_tune_index = -1;
38216 if (!rs6000_inner_target_options (args, false)
38217 || !rs6000_option_override_internal (false)
38218 || (cur_tree = build_target_option_node (&global_options))
38219 == NULL_TREE)
38221 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
38222 fprintf (stderr, "invalid pragma\n");
38224 return false;
38228 target_option_current_node = cur_tree;
38230 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
38231 change the macros that are defined. */
38232 if (rs6000_target_modify_macros_ptr)
38234 prev_opt = TREE_TARGET_OPTION (prev_tree);
38235 prev_bumask = prev_opt->x_rs6000_builtin_mask;
38236 prev_flags = prev_opt->x_rs6000_isa_flags;
38238 cur_opt = TREE_TARGET_OPTION (cur_tree);
38239 cur_flags = cur_opt->x_rs6000_isa_flags;
38240 cur_bumask = cur_opt->x_rs6000_builtin_mask;
38242 diff_bumask = (prev_bumask ^ cur_bumask);
38243 diff_flags = (prev_flags ^ cur_flags);
38245 if ((diff_flags != 0) || (diff_bumask != 0))
38247 /* Delete old macros. */
38248 rs6000_target_modify_macros_ptr (false,
38249 prev_flags & diff_flags,
38250 prev_bumask & diff_bumask);
38252 /* Define new macros. */
38253 rs6000_target_modify_macros_ptr (true,
38254 cur_flags & diff_flags,
38255 cur_bumask & diff_bumask);
38259 return true;
38263 /* Remember the last target of rs6000_set_current_function. */
38264 static GTY(()) tree rs6000_previous_fndecl;
38266 /* Establish appropriate back-end context for processing the function
38267 FNDECL. The argument might be NULL to indicate processing at top
38268 level, outside of any function scope. */
38269 static void
38270 rs6000_set_current_function (tree fndecl)
38272 tree old_tree = (rs6000_previous_fndecl
38273 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
38274 : NULL_TREE);
38276 tree new_tree = (fndecl
38277 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
38278 : NULL_TREE);
38280 if (TARGET_DEBUG_TARGET)
38282 bool print_final = false;
38283 fprintf (stderr, "\n==================== rs6000_set_current_function");
38285 if (fndecl)
38286 fprintf (stderr, ", fndecl %s (%p)",
38287 (DECL_NAME (fndecl)
38288 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
38289 : "<unknown>"), (void *)fndecl);
38291 if (rs6000_previous_fndecl)
38292 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
38294 fprintf (stderr, "\n");
38295 if (new_tree)
38297 fprintf (stderr, "\nnew fndecl target specific options:\n");
38298 debug_tree (new_tree);
38299 print_final = true;
38302 if (old_tree)
38304 fprintf (stderr, "\nold fndecl target specific options:\n");
38305 debug_tree (old_tree);
38306 print_final = true;
38309 if (print_final)
38310 fprintf (stderr, "--------------------\n");
38313 /* Only change the context if the function changes. This hook is called
38314 several times in the course of compiling a function, and we don't want to
38315 slow things down too much or call target_reinit when it isn't safe. */
38316 if (fndecl && fndecl != rs6000_previous_fndecl)
38318 rs6000_previous_fndecl = fndecl;
38319 if (old_tree == new_tree)
38322 else if (new_tree && new_tree != target_option_default_node)
38324 cl_target_option_restore (&global_options,
38325 TREE_TARGET_OPTION (new_tree));
38326 if (TREE_TARGET_GLOBALS (new_tree))
38327 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38328 else
38329 TREE_TARGET_GLOBALS (new_tree)
38330 = save_target_globals_default_opts ();
38333 else if (old_tree && old_tree != target_option_default_node)
38335 new_tree = target_option_current_node;
38336 cl_target_option_restore (&global_options,
38337 TREE_TARGET_OPTION (new_tree));
38338 if (TREE_TARGET_GLOBALS (new_tree))
38339 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38340 else if (new_tree == target_option_default_node)
38341 restore_target_globals (&default_target_globals);
38342 else
38343 TREE_TARGET_GLOBALS (new_tree)
38344 = save_target_globals_default_opts ();
38350 /* Save the current options */
38352 static void
38353 rs6000_function_specific_save (struct cl_target_option *ptr,
38354 struct gcc_options *opts)
38356 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
38357 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
38360 /* Restore the current options */
38362 static void
38363 rs6000_function_specific_restore (struct gcc_options *opts,
38364 struct cl_target_option *ptr)
38367 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
38368 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
38369 (void) rs6000_option_override_internal (false);
38372 /* Print the current options */
38374 static void
38375 rs6000_function_specific_print (FILE *file, int indent,
38376 struct cl_target_option *ptr)
38378 rs6000_print_isa_options (file, indent, "Isa options set",
38379 ptr->x_rs6000_isa_flags);
38381 rs6000_print_isa_options (file, indent, "Isa options explicit",
38382 ptr->x_rs6000_isa_flags_explicit);
38385 /* Helper function to print the current isa or misc options on a line. */
38387 static void
38388 rs6000_print_options_internal (FILE *file,
38389 int indent,
38390 const char *string,
38391 HOST_WIDE_INT flags,
38392 const char *prefix,
38393 const struct rs6000_opt_mask *opts,
38394 size_t num_elements)
38396 size_t i;
38397 size_t start_column = 0;
38398 size_t cur_column;
38399 size_t max_column = 120;
38400 size_t prefix_len = strlen (prefix);
38401 size_t comma_len = 0;
38402 const char *comma = "";
38404 if (indent)
38405 start_column += fprintf (file, "%*s", indent, "");
38407 if (!flags)
38409 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
38410 return;
38413 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
38415 /* Print the various mask options. */
38416 cur_column = start_column;
38417 for (i = 0; i < num_elements; i++)
38419 bool invert = opts[i].invert;
38420 const char *name = opts[i].name;
38421 const char *no_str = "";
38422 HOST_WIDE_INT mask = opts[i].mask;
38423 size_t len = comma_len + prefix_len + strlen (name);
38425 if (!invert)
38427 if ((flags & mask) == 0)
38429 no_str = "no-";
38430 len += sizeof ("no-") - 1;
38433 flags &= ~mask;
38436 else
38438 if ((flags & mask) != 0)
38440 no_str = "no-";
38441 len += sizeof ("no-") - 1;
38444 flags |= mask;
38447 cur_column += len;
38448 if (cur_column > max_column)
38450 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
38451 cur_column = start_column + len;
38452 comma = "";
38455 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
38456 comma = ", ";
38457 comma_len = sizeof (", ") - 1;
38460 fputs ("\n", file);
38463 /* Helper function to print the current isa options on a line. */
38465 static void
38466 rs6000_print_isa_options (FILE *file, int indent, const char *string,
38467 HOST_WIDE_INT flags)
38469 rs6000_print_options_internal (file, indent, string, flags, "-m",
38470 &rs6000_opt_masks[0],
38471 ARRAY_SIZE (rs6000_opt_masks));
38474 static void
38475 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
38476 HOST_WIDE_INT flags)
38478 rs6000_print_options_internal (file, indent, string, flags, "",
38479 &rs6000_builtin_mask_names[0],
38480 ARRAY_SIZE (rs6000_builtin_mask_names));
38484 /* Hook to determine if one function can safely inline another. */
38486 static bool
38487 rs6000_can_inline_p (tree caller, tree callee)
38489 bool ret = false;
38490 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
38491 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
38493 /* If callee has no option attributes, then it is ok to inline. */
38494 if (!callee_tree)
38495 ret = true;
38497 /* If caller has no option attributes, but callee does then it is not ok to
38498 inline. */
38499 else if (!caller_tree)
38500 ret = false;
38502 else
38504 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
38505 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
38507 /* Callee's options should a subset of the caller's, i.e. a vsx function
38508 can inline an altivec function but a non-vsx function can't inline a
38509 vsx function. */
38510 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
38511 == callee_opts->x_rs6000_isa_flags)
38512 ret = true;
38515 if (TARGET_DEBUG_TARGET)
38516 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
38517 (DECL_NAME (caller)
38518 ? IDENTIFIER_POINTER (DECL_NAME (caller))
38519 : "<unknown>"),
38520 (DECL_NAME (callee)
38521 ? IDENTIFIER_POINTER (DECL_NAME (callee))
38522 : "<unknown>"),
38523 (ret ? "can" : "cannot"));
38525 return ret;
38528 /* Allocate a stack temp and fixup the address so it meets the particular
38529 memory requirements (either offetable or REG+REG addressing). */
38532 rs6000_allocate_stack_temp (machine_mode mode,
38533 bool offsettable_p,
38534 bool reg_reg_p)
38536 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
38537 rtx addr = XEXP (stack, 0);
38538 int strict_p = (reload_in_progress || reload_completed);
38540 if (!legitimate_indirect_address_p (addr, strict_p))
38542 if (offsettable_p
38543 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
38544 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38546 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
38547 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38550 return stack;
38553 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
38554 to such a form to deal with memory reference instructions like STFIWX that
38555 only take reg+reg addressing. */
38558 rs6000_address_for_fpconvert (rtx x)
38560 int strict_p = (reload_in_progress || reload_completed);
38561 rtx addr;
38563 gcc_assert (MEM_P (x));
38564 addr = XEXP (x, 0);
38565 if (! legitimate_indirect_address_p (addr, strict_p)
38566 && ! legitimate_indexed_address_p (addr, strict_p))
38568 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
38570 rtx reg = XEXP (addr, 0);
38571 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
38572 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
38573 gcc_assert (REG_P (reg));
38574 emit_insn (gen_add3_insn (reg, reg, size_rtx));
38575 addr = reg;
38577 else if (GET_CODE (addr) == PRE_MODIFY)
38579 rtx reg = XEXP (addr, 0);
38580 rtx expr = XEXP (addr, 1);
38581 gcc_assert (REG_P (reg));
38582 gcc_assert (GET_CODE (expr) == PLUS);
38583 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
38584 addr = reg;
38587 x = replace_equiv_address (x, copy_addr_to_reg (addr));
38590 return x;
38593 /* Given a memory reference, if it is not in the form for altivec memory
38594 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
38595 convert to the altivec format. */
38598 rs6000_address_for_altivec (rtx x)
38600 gcc_assert (MEM_P (x));
38601 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
38603 rtx addr = XEXP (x, 0);
38604 int strict_p = (reload_in_progress || reload_completed);
38606 if (!legitimate_indexed_address_p (addr, strict_p)
38607 && !legitimate_indirect_address_p (addr, strict_p))
38608 addr = copy_to_mode_reg (Pmode, addr);
38610 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
38611 x = change_address (x, GET_MODE (x), addr);
38614 return x;
38617 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
38619 On the RS/6000, all integer constants are acceptable, most won't be valid
38620 for particular insns, though. Only easy FP constants are acceptable. */
38622 static bool
38623 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
38625 if (TARGET_ELF && tls_referenced_p (x))
38626 return false;
38628 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
38629 || GET_MODE (x) == VOIDmode
38630 || (TARGET_POWERPC64 && mode == DImode)
38631 || easy_fp_constant (x, mode)
38632 || easy_vector_constant (x, mode));
38636 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
38638 static bool
38639 chain_already_loaded (rtx_insn *last)
38641 for (; last != NULL; last = PREV_INSN (last))
38643 if (NONJUMP_INSN_P (last))
38645 rtx patt = PATTERN (last);
38647 if (GET_CODE (patt) == SET)
38649 rtx lhs = XEXP (patt, 0);
38651 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
38652 return true;
38656 return false;
38659 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
38661 void
38662 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38664 const bool direct_call_p
38665 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
38666 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
38667 rtx toc_load = NULL_RTX;
38668 rtx toc_restore = NULL_RTX;
38669 rtx func_addr;
38670 rtx abi_reg = NULL_RTX;
38671 rtx call[4];
38672 int n_call;
38673 rtx insn;
38675 /* Handle longcall attributes. */
38676 if (INTVAL (cookie) & CALL_LONG)
38677 func_desc = rs6000_longcall_ref (func_desc);
38679 /* Handle indirect calls. */
38680 if (GET_CODE (func_desc) != SYMBOL_REF
38681 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
38683 /* Save the TOC into its reserved slot before the call,
38684 and prepare to restore it after the call. */
38685 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
38686 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
38687 rtx stack_toc_mem = gen_frame_mem (Pmode,
38688 gen_rtx_PLUS (Pmode, stack_ptr,
38689 stack_toc_offset));
38690 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38691 gen_rtvec (1, stack_toc_offset),
38692 UNSPEC_TOCSLOT);
38693 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38695 /* Can we optimize saving the TOC in the prologue or
38696 do we need to do it at every call? */
38697 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38698 cfun->machine->save_toc_in_prologue = true;
38699 else
38701 MEM_VOLATILE_P (stack_toc_mem) = 1;
38702 emit_move_insn (stack_toc_mem, toc_reg);
38705 if (DEFAULT_ABI == ABI_ELFv2)
38707 /* A function pointer in the ELFv2 ABI is just a plain address, but
38708 the ABI requires it to be loaded into r12 before the call. */
38709 func_addr = gen_rtx_REG (Pmode, 12);
38710 emit_move_insn (func_addr, func_desc);
38711 abi_reg = func_addr;
38713 else
38715 /* A function pointer under AIX is a pointer to a data area whose
38716 first word contains the actual address of the function, whose
38717 second word contains a pointer to its TOC, and whose third word
38718 contains a value to place in the static chain register (r11).
38719 Note that if we load the static chain, our "trampoline" need
38720 not have any executable code. */
38722 /* Load up address of the actual function. */
38723 func_desc = force_reg (Pmode, func_desc);
38724 func_addr = gen_reg_rtx (Pmode);
38725 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38727 /* Prepare to load the TOC of the called function. Note that the
38728 TOC load must happen immediately before the actual call so
38729 that unwinding the TOC registers works correctly. See the
38730 comment in frob_update_context. */
38731 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38732 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38733 gen_rtx_PLUS (Pmode, func_desc,
38734 func_toc_offset));
38735 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38737 /* If we have a static chain, load it up. But, if the call was
38738 originally direct, the 3rd word has not been written since no
38739 trampoline has been built, so we ought not to load it, lest we
38740 override a static chain value. */
38741 if (!direct_call_p
38742 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38743 && !chain_already_loaded (get_current_sequence ()->next->last))
38745 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38746 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38747 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38748 gen_rtx_PLUS (Pmode, func_desc,
38749 func_sc_offset));
38750 emit_move_insn (sc_reg, func_sc_mem);
38751 abi_reg = sc_reg;
38755 else
38757 /* Direct calls use the TOC: for local calls, the callee will
38758 assume the TOC register is set; for non-local calls, the
38759 PLT stub needs the TOC register. */
38760 abi_reg = toc_reg;
38761 func_addr = func_desc;
38764 /* Create the call. */
38765 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38766 if (value != NULL_RTX)
38767 call[0] = gen_rtx_SET (value, call[0]);
38768 n_call = 1;
38770 if (toc_load)
38771 call[n_call++] = toc_load;
38772 if (toc_restore)
38773 call[n_call++] = toc_restore;
38775 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38777 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38778 insn = emit_call_insn (insn);
38780 /* Mention all registers defined by the ABI to hold information
38781 as uses in CALL_INSN_FUNCTION_USAGE. */
38782 if (abi_reg)
38783 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38786 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38788 void
38789 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38791 rtx call[2];
38792 rtx insn;
38794 gcc_assert (INTVAL (cookie) == 0);
38796 /* Create the call. */
38797 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38798 if (value != NULL_RTX)
38799 call[0] = gen_rtx_SET (value, call[0]);
38801 call[1] = simple_return_rtx;
38803 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38804 insn = emit_call_insn (insn);
38806 /* Note use of the TOC register. */
38807 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38810 /* Return whether we need to always update the saved TOC pointer when we update
38811 the stack pointer. */
38813 static bool
38814 rs6000_save_toc_in_prologue_p (void)
38816 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38819 #ifdef HAVE_GAS_HIDDEN
38820 # define USE_HIDDEN_LINKONCE 1
38821 #else
38822 # define USE_HIDDEN_LINKONCE 0
38823 #endif
38825 /* Fills in the label name that should be used for a 476 link stack thunk. */
38827 void
38828 get_ppc476_thunk_name (char name[32])
38830 gcc_assert (TARGET_LINK_STACK);
38832 if (USE_HIDDEN_LINKONCE)
38833 sprintf (name, "__ppc476.get_thunk");
38834 else
38835 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38838 /* This function emits the simple thunk routine that is used to preserve
38839 the link stack on the 476 cpu. */
38841 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38842 static void
38843 rs6000_code_end (void)
38845 char name[32];
38846 tree decl;
38848 if (!TARGET_LINK_STACK)
38849 return;
38851 get_ppc476_thunk_name (name);
38853 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38854 build_function_type_list (void_type_node, NULL_TREE));
38855 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38856 NULL_TREE, void_type_node);
38857 TREE_PUBLIC (decl) = 1;
38858 TREE_STATIC (decl) = 1;
38860 #if RS6000_WEAK
38861 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
38863 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38864 targetm.asm_out.unique_section (decl, 0);
38865 switch_to_section (get_named_section (decl, NULL, 0));
38866 DECL_WEAK (decl) = 1;
38867 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38868 targetm.asm_out.globalize_label (asm_out_file, name);
38869 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38870 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38872 else
38873 #endif
38875 switch_to_section (text_section);
38876 ASM_OUTPUT_LABEL (asm_out_file, name);
38879 DECL_INITIAL (decl) = make_node (BLOCK);
38880 current_function_decl = decl;
38881 allocate_struct_function (decl, false);
38882 init_function_start (decl);
38883 first_function_block_is_cold = false;
38884 /* Make sure unwind info is emitted for the thunk if needed. */
38885 final_start_function (emit_barrier (), asm_out_file, 1);
38887 fputs ("\tblr\n", asm_out_file);
38889 final_end_function ();
38890 init_insn_lengths ();
38891 free_after_compilation (cfun);
38892 set_cfun (NULL);
38893 current_function_decl = NULL;
38896 /* Add r30 to hard reg set if the prologue sets it up and it is not
38897 pic_offset_table_rtx. */
38899 static void
38900 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38902 if (!TARGET_SINGLE_PIC_BASE
38903 && TARGET_TOC
38904 && TARGET_MINIMAL_TOC
38905 && get_pool_size () != 0)
38906 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38907 if (cfun->machine->split_stack_argp_used)
38908 add_to_hard_reg_set (&set->set, Pmode, 12);
38912 /* Helper function for rs6000_split_logical to emit a logical instruction after
38913 spliting the operation to single GPR registers.
38915 DEST is the destination register.
38916 OP1 and OP2 are the input source registers.
38917 CODE is the base operation (AND, IOR, XOR, NOT).
38918 MODE is the machine mode.
38919 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38920 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38921 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38923 static void
38924 rs6000_split_logical_inner (rtx dest,
38925 rtx op1,
38926 rtx op2,
38927 enum rtx_code code,
38928 machine_mode mode,
38929 bool complement_final_p,
38930 bool complement_op1_p,
38931 bool complement_op2_p)
38933 rtx bool_rtx;
38935 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38936 if (op2 && GET_CODE (op2) == CONST_INT
38937 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38938 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38940 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38941 HOST_WIDE_INT value = INTVAL (op2) & mask;
38943 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38944 if (code == AND)
38946 if (value == 0)
38948 emit_insn (gen_rtx_SET (dest, const0_rtx));
38949 return;
38952 else if (value == mask)
38954 if (!rtx_equal_p (dest, op1))
38955 emit_insn (gen_rtx_SET (dest, op1));
38956 return;
38960 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38961 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38962 else if (code == IOR || code == XOR)
38964 if (value == 0)
38966 if (!rtx_equal_p (dest, op1))
38967 emit_insn (gen_rtx_SET (dest, op1));
38968 return;
38973 if (code == AND && mode == SImode
38974 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38976 emit_insn (gen_andsi3 (dest, op1, op2));
38977 return;
38980 if (complement_op1_p)
38981 op1 = gen_rtx_NOT (mode, op1);
38983 if (complement_op2_p)
38984 op2 = gen_rtx_NOT (mode, op2);
38986 /* For canonical RTL, if only one arm is inverted it is the first. */
38987 if (!complement_op1_p && complement_op2_p)
38988 std::swap (op1, op2);
38990 bool_rtx = ((code == NOT)
38991 ? gen_rtx_NOT (mode, op1)
38992 : gen_rtx_fmt_ee (code, mode, op1, op2));
38994 if (complement_final_p)
38995 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38997 emit_insn (gen_rtx_SET (dest, bool_rtx));
39000 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
39001 operations are split immediately during RTL generation to allow for more
39002 optimizations of the AND/IOR/XOR.
39004 OPERANDS is an array containing the destination and two input operands.
39005 CODE is the base operation (AND, IOR, XOR, NOT).
39006 MODE is the machine mode.
39007 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
39008 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
39009 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
39010 CLOBBER_REG is either NULL or a scratch register of type CC to allow
39011 formation of the AND instructions. */
39013 static void
39014 rs6000_split_logical_di (rtx operands[3],
39015 enum rtx_code code,
39016 bool complement_final_p,
39017 bool complement_op1_p,
39018 bool complement_op2_p)
39020 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
39021 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
39022 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
39023 enum hi_lo { hi = 0, lo = 1 };
39024 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
39025 size_t i;
39027 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
39028 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
39029 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
39030 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
39032 if (code == NOT)
39033 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
39034 else
39036 if (GET_CODE (operands[2]) != CONST_INT)
39038 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
39039 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
39041 else
39043 HOST_WIDE_INT value = INTVAL (operands[2]);
39044 HOST_WIDE_INT value_hi_lo[2];
39046 gcc_assert (!complement_final_p);
39047 gcc_assert (!complement_op1_p);
39048 gcc_assert (!complement_op2_p);
39050 value_hi_lo[hi] = value >> 32;
39051 value_hi_lo[lo] = value & lower_32bits;
39053 for (i = 0; i < 2; i++)
39055 HOST_WIDE_INT sub_value = value_hi_lo[i];
39057 if (sub_value & sign_bit)
39058 sub_value |= upper_32bits;
39060 op2_hi_lo[i] = GEN_INT (sub_value);
39062 /* If this is an AND instruction, check to see if we need to load
39063 the value in a register. */
39064 if (code == AND && sub_value != -1 && sub_value != 0
39065 && !and_operand (op2_hi_lo[i], SImode))
39066 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
39071 for (i = 0; i < 2; i++)
39073 /* Split large IOR/XOR operations. */
39074 if ((code == IOR || code == XOR)
39075 && GET_CODE (op2_hi_lo[i]) == CONST_INT
39076 && !complement_final_p
39077 && !complement_op1_p
39078 && !complement_op2_p
39079 && !logical_const_operand (op2_hi_lo[i], SImode))
39081 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
39082 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
39083 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
39084 rtx tmp = gen_reg_rtx (SImode);
39086 /* Make sure the constant is sign extended. */
39087 if ((hi_16bits & sign_bit) != 0)
39088 hi_16bits |= upper_32bits;
39090 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
39091 code, SImode, false, false, false);
39093 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
39094 code, SImode, false, false, false);
39096 else
39097 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
39098 code, SImode, complement_final_p,
39099 complement_op1_p, complement_op2_p);
39102 return;
39105 /* Split the insns that make up boolean operations operating on multiple GPR
39106 registers. The boolean MD patterns ensure that the inputs either are
39107 exactly the same as the output registers, or there is no overlap.
39109 OPERANDS is an array containing the destination and two input operands.
39110 CODE is the base operation (AND, IOR, XOR, NOT).
39111 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
39112 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
39113 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
39115 void
39116 rs6000_split_logical (rtx operands[3],
39117 enum rtx_code code,
39118 bool complement_final_p,
39119 bool complement_op1_p,
39120 bool complement_op2_p)
39122 machine_mode mode = GET_MODE (operands[0]);
39123 machine_mode sub_mode;
39124 rtx op0, op1, op2;
39125 int sub_size, regno0, regno1, nregs, i;
39127 /* If this is DImode, use the specialized version that can run before
39128 register allocation. */
39129 if (mode == DImode && !TARGET_POWERPC64)
39131 rs6000_split_logical_di (operands, code, complement_final_p,
39132 complement_op1_p, complement_op2_p);
39133 return;
39136 op0 = operands[0];
39137 op1 = operands[1];
39138 op2 = (code == NOT) ? NULL_RTX : operands[2];
39139 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
39140 sub_size = GET_MODE_SIZE (sub_mode);
39141 regno0 = REGNO (op0);
39142 regno1 = REGNO (op1);
39144 gcc_assert (reload_completed);
39145 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
39146 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
39148 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
39149 gcc_assert (nregs > 1);
39151 if (op2 && REG_P (op2))
39152 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
39154 for (i = 0; i < nregs; i++)
39156 int offset = i * sub_size;
39157 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
39158 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
39159 rtx sub_op2 = ((code == NOT)
39160 ? NULL_RTX
39161 : simplify_subreg (sub_mode, op2, mode, offset));
39163 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
39164 complement_final_p, complement_op1_p,
39165 complement_op2_p);
39168 return;
39172 /* Return true if the peephole2 can combine a load involving a combination of
39173 an addis instruction and a load with an offset that can be fused together on
39174 a power8. */
39176 bool
39177 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
39178 rtx addis_value, /* addis value. */
39179 rtx target, /* target register that is loaded. */
39180 rtx mem) /* bottom part of the memory addr. */
39182 rtx addr;
39183 rtx base_reg;
39185 /* Validate arguments. */
39186 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
39187 return false;
39189 if (!base_reg_operand (target, GET_MODE (target)))
39190 return false;
39192 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
39193 return false;
39195 /* Allow sign/zero extension. */
39196 if (GET_CODE (mem) == ZERO_EXTEND
39197 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
39198 mem = XEXP (mem, 0);
39200 if (!MEM_P (mem))
39201 return false;
39203 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
39204 return false;
39206 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39207 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
39208 return false;
39210 /* Validate that the register used to load the high value is either the
39211 register being loaded, or we can safely replace its use.
39213 This function is only called from the peephole2 pass and we assume that
39214 there are 2 instructions in the peephole (addis and load), so we want to
39215 check if the target register was not used in the memory address and the
39216 register to hold the addis result is dead after the peephole. */
39217 if (REGNO (addis_reg) != REGNO (target))
39219 if (reg_mentioned_p (target, mem))
39220 return false;
39222 if (!peep2_reg_dead_p (2, addis_reg))
39223 return false;
39225 /* If the target register being loaded is the stack pointer, we must
39226 avoid loading any other value into it, even temporarily. */
39227 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
39228 return false;
39231 base_reg = XEXP (addr, 0);
39232 return REGNO (addis_reg) == REGNO (base_reg);
39235 /* During the peephole2 pass, adjust and expand the insns for a load fusion
39236 sequence. We adjust the addis register to use the target register. If the
39237 load sign extends, we adjust the code to do the zero extending load, and an
39238 explicit sign extension later since the fusion only covers zero extending
39239 loads.
39241 The operands are:
39242 operands[0] register set with addis (to be replaced with target)
39243 operands[1] value set via addis
39244 operands[2] target register being loaded
39245 operands[3] D-form memory reference using operands[0]. */
39247 void
39248 expand_fusion_gpr_load (rtx *operands)
39250 rtx addis_value = operands[1];
39251 rtx target = operands[2];
39252 rtx orig_mem = operands[3];
39253 rtx new_addr, new_mem, orig_addr, offset;
39254 enum rtx_code plus_or_lo_sum;
39255 machine_mode target_mode = GET_MODE (target);
39256 machine_mode extend_mode = target_mode;
39257 machine_mode ptr_mode = Pmode;
39258 enum rtx_code extend = UNKNOWN;
39260 if (GET_CODE (orig_mem) == ZERO_EXTEND
39261 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
39263 extend = GET_CODE (orig_mem);
39264 orig_mem = XEXP (orig_mem, 0);
39265 target_mode = GET_MODE (orig_mem);
39268 gcc_assert (MEM_P (orig_mem));
39270 orig_addr = XEXP (orig_mem, 0);
39271 plus_or_lo_sum = GET_CODE (orig_addr);
39272 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39274 offset = XEXP (orig_addr, 1);
39275 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39276 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39278 if (extend != UNKNOWN)
39279 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
39281 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39282 UNSPEC_FUSION_GPR);
39283 emit_insn (gen_rtx_SET (target, new_mem));
39285 if (extend == SIGN_EXTEND)
39287 int sub_off = ((BYTES_BIG_ENDIAN)
39288 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
39289 : 0);
39290 rtx sign_reg
39291 = simplify_subreg (target_mode, target, extend_mode, sub_off);
39293 emit_insn (gen_rtx_SET (target,
39294 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
39297 return;
39300 /* Emit the addis instruction that will be part of a fused instruction
39301 sequence. */
39303 void
39304 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
39305 const char *mode_name)
39307 rtx fuse_ops[10];
39308 char insn_template[80];
39309 const char *addis_str = NULL;
39310 const char *comment_str = ASM_COMMENT_START;
39312 if (*comment_str == ' ')
39313 comment_str++;
39315 /* Emit the addis instruction. */
39316 fuse_ops[0] = target;
39317 if (satisfies_constraint_L (addis_value))
39319 fuse_ops[1] = addis_value;
39320 addis_str = "lis %0,%v1";
39323 else if (GET_CODE (addis_value) == PLUS)
39325 rtx op0 = XEXP (addis_value, 0);
39326 rtx op1 = XEXP (addis_value, 1);
39328 if (REG_P (op0) && CONST_INT_P (op1)
39329 && satisfies_constraint_L (op1))
39331 fuse_ops[1] = op0;
39332 fuse_ops[2] = op1;
39333 addis_str = "addis %0,%1,%v2";
39337 else if (GET_CODE (addis_value) == HIGH)
39339 rtx value = XEXP (addis_value, 0);
39340 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
39342 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
39343 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
39344 if (TARGET_ELF)
39345 addis_str = "addis %0,%2,%1@toc@ha";
39347 else if (TARGET_XCOFF)
39348 addis_str = "addis %0,%1@u(%2)";
39350 else
39351 gcc_unreachable ();
39354 else if (GET_CODE (value) == PLUS)
39356 rtx op0 = XEXP (value, 0);
39357 rtx op1 = XEXP (value, 1);
39359 if (GET_CODE (op0) == UNSPEC
39360 && XINT (op0, 1) == UNSPEC_TOCREL
39361 && CONST_INT_P (op1))
39363 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
39364 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
39365 fuse_ops[3] = op1;
39366 if (TARGET_ELF)
39367 addis_str = "addis %0,%2,%1+%3@toc@ha";
39369 else if (TARGET_XCOFF)
39370 addis_str = "addis %0,%1+%3@u(%2)";
39372 else
39373 gcc_unreachable ();
39377 else if (satisfies_constraint_L (value))
39379 fuse_ops[1] = value;
39380 addis_str = "lis %0,%v1";
39383 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
39385 fuse_ops[1] = value;
39386 addis_str = "lis %0,%1@ha";
39390 if (!addis_str)
39391 fatal_insn ("Could not generate addis value for fusion", addis_value);
39393 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
39394 comment, mode_name);
39395 output_asm_insn (insn_template, fuse_ops);
39398 /* Emit a D-form load or store instruction that is the second instruction
39399 of a fusion sequence. */
39401 void
39402 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
39403 const char *insn_str)
39405 rtx fuse_ops[10];
39406 char insn_template[80];
39408 fuse_ops[0] = load_store_reg;
39409 fuse_ops[1] = addis_reg;
39411 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
39413 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
39414 fuse_ops[2] = offset;
39415 output_asm_insn (insn_template, fuse_ops);
39418 else if (GET_CODE (offset) == UNSPEC
39419 && XINT (offset, 1) == UNSPEC_TOCREL)
39421 if (TARGET_ELF)
39422 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
39424 else if (TARGET_XCOFF)
39425 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39427 else
39428 gcc_unreachable ();
39430 fuse_ops[2] = XVECEXP (offset, 0, 0);
39431 output_asm_insn (insn_template, fuse_ops);
39434 else if (GET_CODE (offset) == PLUS
39435 && GET_CODE (XEXP (offset, 0)) == UNSPEC
39436 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
39437 && CONST_INT_P (XEXP (offset, 1)))
39439 rtx tocrel_unspec = XEXP (offset, 0);
39440 if (TARGET_ELF)
39441 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
39443 else if (TARGET_XCOFF)
39444 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
39446 else
39447 gcc_unreachable ();
39449 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
39450 fuse_ops[3] = XEXP (offset, 1);
39451 output_asm_insn (insn_template, fuse_ops);
39454 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
39456 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39458 fuse_ops[2] = offset;
39459 output_asm_insn (insn_template, fuse_ops);
39462 else
39463 fatal_insn ("Unable to generate load/store offset for fusion", offset);
39465 return;
39468 /* Wrap a TOC address that can be fused to indicate that special fusion
39469 processing is needed. */
39472 fusion_wrap_memory_address (rtx old_mem)
39474 rtx old_addr = XEXP (old_mem, 0);
39475 rtvec v = gen_rtvec (1, old_addr);
39476 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
39477 return replace_equiv_address_nv (old_mem, new_addr, false);
39480 /* Given an address, convert it into the addis and load offset parts. Addresses
39481 created during the peephole2 process look like:
39482 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
39483 (unspec [(...)] UNSPEC_TOCREL))
39485 Addresses created via toc fusion look like:
39486 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
39488 static void
39489 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
39491 rtx hi, lo;
39493 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
39495 lo = XVECEXP (addr, 0, 0);
39496 hi = gen_rtx_HIGH (Pmode, lo);
39498 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
39500 hi = XEXP (addr, 0);
39501 lo = XEXP (addr, 1);
39503 else
39504 gcc_unreachable ();
39506 *p_hi = hi;
39507 *p_lo = lo;
39510 /* Return a string to fuse an addis instruction with a gpr load to the same
39511 register that we loaded up the addis instruction. The address that is used
39512 is the logical address that was formed during peephole2:
39513 (lo_sum (high) (low-part))
39515 Or the address is the TOC address that is wrapped before register allocation:
39516 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
39518 The code is complicated, so we call output_asm_insn directly, and just
39519 return "". */
39521 const char *
39522 emit_fusion_gpr_load (rtx target, rtx mem)
39524 rtx addis_value;
39525 rtx addr;
39526 rtx load_offset;
39527 const char *load_str = NULL;
39528 const char *mode_name = NULL;
39529 machine_mode mode;
39531 if (GET_CODE (mem) == ZERO_EXTEND)
39532 mem = XEXP (mem, 0);
39534 gcc_assert (REG_P (target) && MEM_P (mem));
39536 addr = XEXP (mem, 0);
39537 fusion_split_address (addr, &addis_value, &load_offset);
39539 /* Now emit the load instruction to the same register. */
39540 mode = GET_MODE (mem);
39541 switch (mode)
39543 case QImode:
39544 mode_name = "char";
39545 load_str = "lbz";
39546 break;
39548 case HImode:
39549 mode_name = "short";
39550 load_str = "lhz";
39551 break;
39553 case SImode:
39554 case SFmode:
39555 mode_name = (mode == SFmode) ? "float" : "int";
39556 load_str = "lwz";
39557 break;
39559 case DImode:
39560 case DFmode:
39561 gcc_assert (TARGET_POWERPC64);
39562 mode_name = (mode == DFmode) ? "double" : "long";
39563 load_str = "ld";
39564 break;
39566 default:
39567 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
39570 /* Emit the addis instruction. */
39571 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
39573 /* Emit the D-form load instruction. */
39574 emit_fusion_load_store (target, target, load_offset, load_str);
39576 return "";
39580 /* Return true if the peephole2 can combine a load/store involving a
39581 combination of an addis instruction and the memory operation. This was
39582 added to the ISA 3.0 (power9) hardware. */
39584 bool
39585 fusion_p9_p (rtx addis_reg, /* register set via addis. */
39586 rtx addis_value, /* addis value. */
39587 rtx dest, /* destination (memory or register). */
39588 rtx src) /* source (register or memory). */
39590 rtx addr, mem, offset;
39591 enum machine_mode mode = GET_MODE (src);
39593 /* Validate arguments. */
39594 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
39595 return false;
39597 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
39598 return false;
39600 /* Ignore extend operations that are part of the load. */
39601 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
39602 src = XEXP (src, 0);
39604 /* Test for memory<-register or register<-memory. */
39605 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
39607 if (!MEM_P (dest))
39608 return false;
39610 mem = dest;
39613 else if (MEM_P (src))
39615 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
39616 return false;
39618 mem = src;
39621 else
39622 return false;
39624 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39625 if (GET_CODE (addr) == PLUS)
39627 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39628 return false;
39630 return satisfies_constraint_I (XEXP (addr, 1));
39633 else if (GET_CODE (addr) == LO_SUM)
39635 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39636 return false;
39638 offset = XEXP (addr, 1);
39639 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
39640 return small_toc_ref (offset, GET_MODE (offset));
39642 else if (TARGET_ELF && !TARGET_POWERPC64)
39643 return CONSTANT_P (offset);
39646 return false;
39649 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39650 load sequence.
39652 The operands are:
39653 operands[0] register set with addis
39654 operands[1] value set via addis
39655 operands[2] target register being loaded
39656 operands[3] D-form memory reference using operands[0].
39658 This is similar to the fusion introduced with power8, except it scales to
39659 both loads/stores and does not require the result register to be the same as
39660 the base register. At the moment, we only do this if register set with addis
39661 is dead. */
39663 void
39664 expand_fusion_p9_load (rtx *operands)
39666 rtx tmp_reg = operands[0];
39667 rtx addis_value = operands[1];
39668 rtx target = operands[2];
39669 rtx orig_mem = operands[3];
39670 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
39671 enum rtx_code plus_or_lo_sum;
39672 machine_mode target_mode = GET_MODE (target);
39673 machine_mode extend_mode = target_mode;
39674 machine_mode ptr_mode = Pmode;
39675 enum rtx_code extend = UNKNOWN;
39677 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
39679 extend = GET_CODE (orig_mem);
39680 orig_mem = XEXP (orig_mem, 0);
39681 target_mode = GET_MODE (orig_mem);
39684 gcc_assert (MEM_P (orig_mem));
39686 orig_addr = XEXP (orig_mem, 0);
39687 plus_or_lo_sum = GET_CODE (orig_addr);
39688 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39690 offset = XEXP (orig_addr, 1);
39691 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39692 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39694 if (extend != UNKNOWN)
39695 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
39697 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39698 UNSPEC_FUSION_P9);
39700 set = gen_rtx_SET (target, new_mem);
39701 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39702 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39703 emit_insn (insn);
39705 return;
39708 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39709 store sequence.
39711 The operands are:
39712 operands[0] register set with addis
39713 operands[1] value set via addis
39714 operands[2] target D-form memory being stored to
39715 operands[3] register being stored
39717 This is similar to the fusion introduced with power8, except it scales to
39718 both loads/stores and does not require the result register to be the same as
39719 the base register. At the moment, we only do this if register set with addis
39720 is dead. */
39722 void
39723 expand_fusion_p9_store (rtx *operands)
39725 rtx tmp_reg = operands[0];
39726 rtx addis_value = operands[1];
39727 rtx orig_mem = operands[2];
39728 rtx src = operands[3];
39729 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39730 enum rtx_code plus_or_lo_sum;
39731 machine_mode target_mode = GET_MODE (orig_mem);
39732 machine_mode ptr_mode = Pmode;
39734 gcc_assert (MEM_P (orig_mem));
39736 orig_addr = XEXP (orig_mem, 0);
39737 plus_or_lo_sum = GET_CODE (orig_addr);
39738 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39740 offset = XEXP (orig_addr, 1);
39741 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39742 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39744 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39745 UNSPEC_FUSION_P9);
39747 set = gen_rtx_SET (new_mem, new_src);
39748 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39749 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39750 emit_insn (insn);
39752 return;
39755 /* Return a string to fuse an addis instruction with a load using extended
39756 fusion. The address that is used is the logical address that was formed
39757 during peephole2: (lo_sum (high) (low-part))
39759 The code is complicated, so we call output_asm_insn directly, and just
39760 return "". */
39762 const char *
39763 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39765 enum machine_mode mode = GET_MODE (reg);
39766 rtx hi;
39767 rtx lo;
39768 rtx addr;
39769 const char *load_string;
39770 int r;
39772 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39774 mem = XEXP (mem, 0);
39775 mode = GET_MODE (mem);
39778 if (GET_CODE (reg) == SUBREG)
39780 gcc_assert (SUBREG_BYTE (reg) == 0);
39781 reg = SUBREG_REG (reg);
39784 if (!REG_P (reg))
39785 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39787 r = REGNO (reg);
39788 if (FP_REGNO_P (r))
39790 if (mode == SFmode)
39791 load_string = "lfs";
39792 else if (mode == DFmode || mode == DImode)
39793 load_string = "lfd";
39794 else
39795 gcc_unreachable ();
39797 else if (INT_REGNO_P (r))
39799 switch (mode)
39801 case QImode:
39802 load_string = "lbz";
39803 break;
39804 case HImode:
39805 load_string = "lhz";
39806 break;
39807 case SImode:
39808 case SFmode:
39809 load_string = "lwz";
39810 break;
39811 case DImode:
39812 case DFmode:
39813 if (!TARGET_POWERPC64)
39814 gcc_unreachable ();
39815 load_string = "ld";
39816 break;
39817 default:
39818 gcc_unreachable ();
39821 else
39822 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39824 if (!MEM_P (mem))
39825 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39827 addr = XEXP (mem, 0);
39828 fusion_split_address (addr, &hi, &lo);
39830 /* Emit the addis instruction. */
39831 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
39833 /* Emit the D-form load instruction. */
39834 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39836 return "";
39839 /* Return a string to fuse an addis instruction with a store using extended
39840 fusion. The address that is used is the logical address that was formed
39841 during peephole2: (lo_sum (high) (low-part))
39843 The code is complicated, so we call output_asm_insn directly, and just
39844 return "". */
39846 const char *
39847 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39849 enum machine_mode mode = GET_MODE (reg);
39850 rtx hi;
39851 rtx lo;
39852 rtx addr;
39853 const char *store_string;
39854 int r;
39856 if (GET_CODE (reg) == SUBREG)
39858 gcc_assert (SUBREG_BYTE (reg) == 0);
39859 reg = SUBREG_REG (reg);
39862 if (!REG_P (reg))
39863 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39865 r = REGNO (reg);
39866 if (FP_REGNO_P (r))
39868 if (mode == SFmode)
39869 store_string = "stfs";
39870 else if (mode == DFmode)
39871 store_string = "stfd";
39872 else
39873 gcc_unreachable ();
39875 else if (INT_REGNO_P (r))
39877 switch (mode)
39879 case QImode:
39880 store_string = "stb";
39881 break;
39882 case HImode:
39883 store_string = "sth";
39884 break;
39885 case SImode:
39886 case SFmode:
39887 store_string = "stw";
39888 break;
39889 case DImode:
39890 case DFmode:
39891 if (!TARGET_POWERPC64)
39892 gcc_unreachable ();
39893 store_string = "std";
39894 break;
39895 default:
39896 gcc_unreachable ();
39899 else
39900 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39902 if (!MEM_P (mem))
39903 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39905 addr = XEXP (mem, 0);
39906 fusion_split_address (addr, &hi, &lo);
39908 /* Emit the addis instruction. */
39909 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39911 /* Emit the D-form load instruction. */
39912 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39914 return "";
39918 /* Analyze vector computations and remove unnecessary doubleword
39919 swaps (xxswapdi instructions). This pass is performed only
39920 for little-endian VSX code generation.
39922 For this specific case, loads and stores of 4x32 and 2x64 vectors
39923 are inefficient. These are implemented using the lvx2dx and
39924 stvx2dx instructions, which invert the order of doublewords in
39925 a vector register. Thus the code generation inserts an xxswapdi
39926 after each such load, and prior to each such store. (For spill
39927 code after register assignment, an additional xxswapdi is inserted
39928 following each store in order to return a hard register to its
39929 unpermuted value.)
39931 The extra xxswapdi instructions reduce performance. This can be
39932 particularly bad for vectorized code. The purpose of this pass
39933 is to reduce the number of xxswapdi instructions required for
39934 correctness.
39936 The primary insight is that much code that operates on vectors
39937 does not care about the relative order of elements in a register,
39938 so long as the correct memory order is preserved. If we have
39939 a computation where all input values are provided by lvxd2x/xxswapdi
39940 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39941 and all intermediate computations are pure SIMD (independent of
39942 element order), then all the xxswapdi's associated with the loads
39943 and stores may be removed.
39945 This pass uses some of the infrastructure and logical ideas from
39946 the "web" pass in web.c. We create maximal webs of computations
39947 fitting the description above using union-find. Each such web is
39948 then optimized by removing its unnecessary xxswapdi instructions.
39950 The pass is placed prior to global optimization so that we can
39951 perform the optimization in the safest and simplest way possible;
39952 that is, by replacing each xxswapdi insn with a register copy insn.
39953 Subsequent forward propagation will remove copies where possible.
39955 There are some operations sensitive to element order for which we
39956 can still allow the operation, provided we modify those operations.
39957 These include CONST_VECTORs, for which we must swap the first and
39958 second halves of the constant vector; and SUBREGs, for which we
39959 must adjust the byte offset to account for the swapped doublewords.
39960 A remaining opportunity would be non-immediate-form splats, for
39961 which we should adjust the selected lane of the input. We should
39962 also make code generation adjustments for sum-across operations,
39963 since this is a common vectorizer reduction.
39965 Because we run prior to the first split, we can see loads and stores
39966 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39967 vector loads and stores that have not yet been split into a permuting
39968 load/store and a swap. (One way this can happen is with a builtin
39969 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39970 than deleting a swap, we convert the load/store into a permuting
39971 load/store (which effectively removes the swap). */
39973 /* Notes on Permutes
39975 We do not currently handle computations that contain permutes. There
39976 is a general transformation that can be performed correctly, but it
39977 may introduce more expensive code than it replaces. To handle these
39978 would require a cost model to determine when to perform the optimization.
39979 This commentary records how this could be done if desired.
39981 The most general permute is something like this (example for V16QI):
39983 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39984 (parallel [(const_int a0) (const_int a1)
39986 (const_int a14) (const_int a15)]))
39988 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39989 to produce in the result.
39991 Regardless of mode, we can convert the PARALLEL to a mask of 16
39992 byte-element selectors. Let's call this M, with M[i] representing
39993 the ith byte-element selector value. Then if we swap doublewords
39994 throughout the computation, we can get correct behavior by replacing
39995 M with M' as follows:
39997 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39998 { ((M[i]+8)%16)+16 : M[i] in [16,31]
40000 This seems promising at first, since we are just replacing one mask
40001 with another. But certain masks are preferable to others. If M
40002 is a mask that matches a vmrghh pattern, for example, M' certainly
40003 will not. Instead of a single vmrghh, we would generate a load of
40004 M' and a vperm. So we would need to know how many xxswapd's we can
40005 remove as a result of this transformation to determine if it's
40006 profitable; and preferably the logic would need to be aware of all
40007 the special preferable masks.
40009 Another form of permute is an UNSPEC_VPERM, in which the mask is
40010 already in a register. In some cases, this mask may be a constant
40011 that we can discover with ud-chains, in which case the above
40012 transformation is ok. However, the common usage here is for the
40013 mask to be produced by an UNSPEC_LVSL, in which case the mask
40014 cannot be known at compile time. In such a case we would have to
40015 generate several instructions to compute M' as above at run time,
40016 and a cost model is needed again.
40018 However, when the mask M for an UNSPEC_VPERM is loaded from the
40019 constant pool, we can replace M with M' as above at no cost
40020 beyond adding a constant pool entry. */
40022 /* This is based on the union-find logic in web.c. web_entry_base is
40023 defined in df.h. */
40024 class swap_web_entry : public web_entry_base
40026 public:
40027 /* Pointer to the insn. */
40028 rtx_insn *insn;
40029 /* Set if insn contains a mention of a vector register. All other
40030 fields are undefined if this field is unset. */
40031 unsigned int is_relevant : 1;
40032 /* Set if insn is a load. */
40033 unsigned int is_load : 1;
40034 /* Set if insn is a store. */
40035 unsigned int is_store : 1;
40036 /* Set if insn is a doubleword swap. This can either be a register swap
40037 or a permuting load or store (test is_load and is_store for this). */
40038 unsigned int is_swap : 1;
40039 /* Set if the insn has a live-in use of a parameter register. */
40040 unsigned int is_live_in : 1;
40041 /* Set if the insn has a live-out def of a return register. */
40042 unsigned int is_live_out : 1;
40043 /* Set if the insn contains a subreg reference of a vector register. */
40044 unsigned int contains_subreg : 1;
40045 /* Set if the insn contains a 128-bit integer operand. */
40046 unsigned int is_128_int : 1;
40047 /* Set if this is a call-insn. */
40048 unsigned int is_call : 1;
40049 /* Set if this insn does not perform a vector operation for which
40050 element order matters, or if we know how to fix it up if it does.
40051 Undefined if is_swap is set. */
40052 unsigned int is_swappable : 1;
40053 /* A nonzero value indicates what kind of special handling for this
40054 insn is required if doublewords are swapped. Undefined if
40055 is_swappable is not set. */
40056 unsigned int special_handling : 4;
40057 /* Set if the web represented by this entry cannot be optimized. */
40058 unsigned int web_not_optimizable : 1;
40059 /* Set if this insn should be deleted. */
40060 unsigned int will_delete : 1;
40063 enum special_handling_values {
40064 SH_NONE = 0,
40065 SH_CONST_VECTOR,
40066 SH_SUBREG,
40067 SH_NOSWAP_LD,
40068 SH_NOSWAP_ST,
40069 SH_EXTRACT,
40070 SH_SPLAT,
40071 SH_XXPERMDI,
40072 SH_CONCAT,
40073 SH_VPERM
40076 /* Union INSN with all insns containing definitions that reach USE.
40077 Detect whether USE is live-in to the current function. */
40078 static void
40079 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
40081 struct df_link *link = DF_REF_CHAIN (use);
40083 if (!link)
40084 insn_entry[INSN_UID (insn)].is_live_in = 1;
40086 while (link)
40088 if (DF_REF_IS_ARTIFICIAL (link->ref))
40089 insn_entry[INSN_UID (insn)].is_live_in = 1;
40091 if (DF_REF_INSN_INFO (link->ref))
40093 rtx def_insn = DF_REF_INSN (link->ref);
40094 (void)unionfind_union (insn_entry + INSN_UID (insn),
40095 insn_entry + INSN_UID (def_insn));
40098 link = link->next;
40102 /* Union INSN with all insns containing uses reached from DEF.
40103 Detect whether DEF is live-out from the current function. */
40104 static void
40105 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
40107 struct df_link *link = DF_REF_CHAIN (def);
40109 if (!link)
40110 insn_entry[INSN_UID (insn)].is_live_out = 1;
40112 while (link)
40114 /* This could be an eh use or some other artificial use;
40115 we treat these all the same (killing the optimization). */
40116 if (DF_REF_IS_ARTIFICIAL (link->ref))
40117 insn_entry[INSN_UID (insn)].is_live_out = 1;
40119 if (DF_REF_INSN_INFO (link->ref))
40121 rtx use_insn = DF_REF_INSN (link->ref);
40122 (void)unionfind_union (insn_entry + INSN_UID (insn),
40123 insn_entry + INSN_UID (use_insn));
40126 link = link->next;
40130 /* Return 1 iff INSN is a load insn, including permuting loads that
40131 represent an lvxd2x instruction; else return 0. */
40132 static unsigned int
40133 insn_is_load_p (rtx insn)
40135 rtx body = PATTERN (insn);
40137 if (GET_CODE (body) == SET)
40139 if (GET_CODE (SET_SRC (body)) == MEM)
40140 return 1;
40142 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
40143 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
40144 return 1;
40146 return 0;
40149 if (GET_CODE (body) != PARALLEL)
40150 return 0;
40152 rtx set = XVECEXP (body, 0, 0);
40154 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
40155 return 1;
40157 return 0;
40160 /* Return 1 iff INSN is a store insn, including permuting stores that
40161 represent an stvxd2x instruction; else return 0. */
40162 static unsigned int
40163 insn_is_store_p (rtx insn)
40165 rtx body = PATTERN (insn);
40166 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
40167 return 1;
40168 if (GET_CODE (body) != PARALLEL)
40169 return 0;
40170 rtx set = XVECEXP (body, 0, 0);
40171 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
40172 return 1;
40173 return 0;
40176 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
40177 a permuting load, or a permuting store. */
40178 static unsigned int
40179 insn_is_swap_p (rtx insn)
40181 rtx body = PATTERN (insn);
40182 if (GET_CODE (body) != SET)
40183 return 0;
40184 rtx rhs = SET_SRC (body);
40185 if (GET_CODE (rhs) != VEC_SELECT)
40186 return 0;
40187 rtx parallel = XEXP (rhs, 1);
40188 if (GET_CODE (parallel) != PARALLEL)
40189 return 0;
40190 unsigned int len = XVECLEN (parallel, 0);
40191 if (len != 2 && len != 4 && len != 8 && len != 16)
40192 return 0;
40193 for (unsigned int i = 0; i < len / 2; ++i)
40195 rtx op = XVECEXP (parallel, 0, i);
40196 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
40197 return 0;
40199 for (unsigned int i = len / 2; i < len; ++i)
40201 rtx op = XVECEXP (parallel, 0, i);
40202 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
40203 return 0;
40205 return 1;
40208 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
40209 static bool
40210 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
40212 unsigned uid = INSN_UID (insn);
40213 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
40214 return false;
40216 /* Find the unique use in the swap and locate its def. If the def
40217 isn't unique, punt. */
40218 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40219 df_ref use;
40220 FOR_EACH_INSN_INFO_USE (use, insn_info)
40222 struct df_link *def_link = DF_REF_CHAIN (use);
40223 if (!def_link || def_link->next)
40224 return false;
40226 rtx def_insn = DF_REF_INSN (def_link->ref);
40227 unsigned uid2 = INSN_UID (def_insn);
40228 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
40229 return false;
40231 rtx body = PATTERN (def_insn);
40232 if (GET_CODE (body) != SET
40233 || GET_CODE (SET_SRC (body)) != VEC_SELECT
40234 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
40235 return false;
40237 rtx mem = XEXP (SET_SRC (body), 0);
40238 rtx base_reg = XEXP (mem, 0);
40240 df_ref base_use;
40241 insn_info = DF_INSN_INFO_GET (def_insn);
40242 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40244 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40245 continue;
40247 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40248 if (!base_def_link || base_def_link->next)
40249 return false;
40251 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
40252 rtx tocrel_body = PATTERN (tocrel_insn);
40253 rtx base, offset;
40254 if (GET_CODE (tocrel_body) != SET)
40255 return false;
40256 /* There is an extra level of indirection for small/large
40257 code models. */
40258 rtx tocrel_expr = SET_SRC (tocrel_body);
40259 if (GET_CODE (tocrel_expr) == MEM)
40260 tocrel_expr = XEXP (tocrel_expr, 0);
40261 if (!toc_relative_expr_p (tocrel_expr, false))
40262 return false;
40263 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40264 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
40265 return false;
40268 return true;
40271 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
40272 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
40273 static bool
40274 v2df_reduction_p (rtx op)
40276 if (GET_MODE (op) != V2DFmode)
40277 return false;
40279 enum rtx_code code = GET_CODE (op);
40280 if (code != PLUS && code != SMIN && code != SMAX)
40281 return false;
40283 rtx concat = XEXP (op, 0);
40284 if (GET_CODE (concat) != VEC_CONCAT)
40285 return false;
40287 rtx select0 = XEXP (concat, 0);
40288 rtx select1 = XEXP (concat, 1);
40289 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
40290 return false;
40292 rtx reg0 = XEXP (select0, 0);
40293 rtx reg1 = XEXP (select1, 0);
40294 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
40295 return false;
40297 rtx parallel0 = XEXP (select0, 1);
40298 rtx parallel1 = XEXP (select1, 1);
40299 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
40300 return false;
40302 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
40303 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
40304 return false;
40306 return true;
40309 /* Return 1 iff OP is an operand that will not be affected by having
40310 vector doublewords swapped in memory. */
40311 static unsigned int
40312 rtx_is_swappable_p (rtx op, unsigned int *special)
40314 enum rtx_code code = GET_CODE (op);
40315 int i, j;
40316 rtx parallel;
40318 switch (code)
40320 case LABEL_REF:
40321 case SYMBOL_REF:
40322 case CLOBBER:
40323 case REG:
40324 return 1;
40326 case VEC_CONCAT:
40327 case ASM_INPUT:
40328 case ASM_OPERANDS:
40329 return 0;
40331 case CONST_VECTOR:
40333 *special = SH_CONST_VECTOR;
40334 return 1;
40337 case VEC_DUPLICATE:
40338 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
40339 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
40340 it represents a vector splat for which we can do special
40341 handling. */
40342 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
40343 return 1;
40344 else if (REG_P (XEXP (op, 0))
40345 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40346 /* This catches V2DF and V2DI splat, at a minimum. */
40347 return 1;
40348 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
40349 && REG_P (XEXP (XEXP (op, 0), 0))
40350 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40351 /* This catches splat of a truncated value. */
40352 return 1;
40353 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
40354 /* If the duplicated item is from a select, defer to the select
40355 processing to see if we can change the lane for the splat. */
40356 return rtx_is_swappable_p (XEXP (op, 0), special);
40357 else
40358 return 0;
40360 case VEC_SELECT:
40361 /* A vec_extract operation is ok if we change the lane. */
40362 if (GET_CODE (XEXP (op, 0)) == REG
40363 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
40364 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40365 && XVECLEN (parallel, 0) == 1
40366 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
40368 *special = SH_EXTRACT;
40369 return 1;
40371 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
40372 XXPERMDI is a swap operation, it will be identified by
40373 insn_is_swap_p and therefore we won't get here. */
40374 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
40375 && (GET_MODE (XEXP (op, 0)) == V4DFmode
40376 || GET_MODE (XEXP (op, 0)) == V4DImode)
40377 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40378 && XVECLEN (parallel, 0) == 2
40379 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
40380 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
40382 *special = SH_XXPERMDI;
40383 return 1;
40385 else if (v2df_reduction_p (op))
40386 return 1;
40387 else
40388 return 0;
40390 case UNSPEC:
40392 /* Various operations are unsafe for this optimization, at least
40393 without significant additional work. Permutes are obviously
40394 problematic, as both the permute control vector and the ordering
40395 of the target values are invalidated by doubleword swapping.
40396 Vector pack and unpack modify the number of vector lanes.
40397 Merge-high/low will not operate correctly on swapped operands.
40398 Vector shifts across element boundaries are clearly uncool,
40399 as are vector select and concatenate operations. Vector
40400 sum-across instructions define one operand with a specific
40401 order-dependent element, so additional fixup code would be
40402 needed to make those work. Vector set and non-immediate-form
40403 vector splat are element-order sensitive. A few of these
40404 cases might be workable with special handling if required.
40405 Adding cost modeling would be appropriate in some cases. */
40406 int val = XINT (op, 1);
40407 switch (val)
40409 default:
40410 break;
40411 case UNSPEC_VMRGH_DIRECT:
40412 case UNSPEC_VMRGL_DIRECT:
40413 case UNSPEC_VPACK_SIGN_SIGN_SAT:
40414 case UNSPEC_VPACK_SIGN_UNS_SAT:
40415 case UNSPEC_VPACK_UNS_UNS_MOD:
40416 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
40417 case UNSPEC_VPACK_UNS_UNS_SAT:
40418 case UNSPEC_VPERM:
40419 case UNSPEC_VPERM_UNS:
40420 case UNSPEC_VPERMHI:
40421 case UNSPEC_VPERMSI:
40422 case UNSPEC_VPKPX:
40423 case UNSPEC_VSLDOI:
40424 case UNSPEC_VSLO:
40425 case UNSPEC_VSRO:
40426 case UNSPEC_VSUM2SWS:
40427 case UNSPEC_VSUM4S:
40428 case UNSPEC_VSUM4UBS:
40429 case UNSPEC_VSUMSWS:
40430 case UNSPEC_VSUMSWS_DIRECT:
40431 case UNSPEC_VSX_CONCAT:
40432 case UNSPEC_VSX_SET:
40433 case UNSPEC_VSX_SLDWI:
40434 case UNSPEC_VUNPACK_HI_SIGN:
40435 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
40436 case UNSPEC_VUNPACK_LO_SIGN:
40437 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
40438 case UNSPEC_VUPKHPX:
40439 case UNSPEC_VUPKHS_V4SF:
40440 case UNSPEC_VUPKHU_V4SF:
40441 case UNSPEC_VUPKLPX:
40442 case UNSPEC_VUPKLS_V4SF:
40443 case UNSPEC_VUPKLU_V4SF:
40444 case UNSPEC_VSX_CVDPSPN:
40445 case UNSPEC_VSX_CVSPDP:
40446 case UNSPEC_VSX_CVSPDPN:
40447 case UNSPEC_VSX_EXTRACT:
40448 case UNSPEC_VSX_VSLO:
40449 case UNSPEC_VSX_VEC_INIT:
40450 return 0;
40451 case UNSPEC_VSPLT_DIRECT:
40452 *special = SH_SPLAT;
40453 return 1;
40454 case UNSPEC_REDUC_PLUS:
40455 case UNSPEC_REDUC:
40456 return 1;
40460 default:
40461 break;
40464 const char *fmt = GET_RTX_FORMAT (code);
40465 int ok = 1;
40467 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40468 if (fmt[i] == 'e' || fmt[i] == 'u')
40470 unsigned int special_op = SH_NONE;
40471 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
40472 if (special_op == SH_NONE)
40473 continue;
40474 /* Ensure we never have two kinds of special handling
40475 for the same insn. */
40476 if (*special != SH_NONE && *special != special_op)
40477 return 0;
40478 *special = special_op;
40480 else if (fmt[i] == 'E')
40481 for (j = 0; j < XVECLEN (op, i); ++j)
40483 unsigned int special_op = SH_NONE;
40484 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
40485 if (special_op == SH_NONE)
40486 continue;
40487 /* Ensure we never have two kinds of special handling
40488 for the same insn. */
40489 if (*special != SH_NONE && *special != special_op)
40490 return 0;
40491 *special = special_op;
40494 return ok;
40497 /* Return 1 iff INSN is an operand that will not be affected by
40498 having vector doublewords swapped in memory (in which case
40499 *SPECIAL is unchanged), or that can be modified to be correct
40500 if vector doublewords are swapped in memory (in which case
40501 *SPECIAL is changed to a value indicating how). */
40502 static unsigned int
40503 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
40504 unsigned int *special)
40506 /* Calls are always bad. */
40507 if (GET_CODE (insn) == CALL_INSN)
40508 return 0;
40510 /* Loads and stores seen here are not permuting, but we can still
40511 fix them up by converting them to permuting ones. Exceptions:
40512 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
40513 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
40514 for the SET source. Also we must now make an exception for lvx
40515 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
40516 explicit "& -16") since this leads to unrecognizable insns. */
40517 rtx body = PATTERN (insn);
40518 int i = INSN_UID (insn);
40520 if (insn_entry[i].is_load)
40522 if (GET_CODE (body) == SET)
40524 rtx rhs = SET_SRC (body);
40525 gcc_assert (GET_CODE (rhs) == MEM);
40526 if (GET_CODE (XEXP (rhs, 0)) == AND)
40527 return 0;
40529 *special = SH_NOSWAP_LD;
40530 return 1;
40532 else
40533 return 0;
40536 if (insn_entry[i].is_store)
40538 if (GET_CODE (body) == SET
40539 && GET_CODE (SET_SRC (body)) != UNSPEC)
40541 rtx lhs = SET_DEST (body);
40542 gcc_assert (GET_CODE (lhs) == MEM);
40543 if (GET_CODE (XEXP (lhs, 0)) == AND)
40544 return 0;
40546 *special = SH_NOSWAP_ST;
40547 return 1;
40549 else
40550 return 0;
40553 /* A convert to single precision can be left as is provided that
40554 all of its uses are in xxspltw instructions that splat BE element
40555 zero. */
40556 if (GET_CODE (body) == SET
40557 && GET_CODE (SET_SRC (body)) == UNSPEC
40558 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
40560 df_ref def;
40561 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40563 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40565 struct df_link *link = DF_REF_CHAIN (def);
40566 if (!link)
40567 return 0;
40569 for (; link; link = link->next) {
40570 rtx use_insn = DF_REF_INSN (link->ref);
40571 rtx use_body = PATTERN (use_insn);
40572 if (GET_CODE (use_body) != SET
40573 || GET_CODE (SET_SRC (use_body)) != UNSPEC
40574 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
40575 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
40576 return 0;
40580 return 1;
40583 /* A concatenation of two doublewords is ok if we reverse the
40584 order of the inputs. */
40585 if (GET_CODE (body) == SET
40586 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
40587 && (GET_MODE (SET_SRC (body)) == V2DFmode
40588 || GET_MODE (SET_SRC (body)) == V2DImode))
40590 *special = SH_CONCAT;
40591 return 1;
40594 /* V2DF reductions are always swappable. */
40595 if (GET_CODE (body) == PARALLEL)
40597 rtx expr = XVECEXP (body, 0, 0);
40598 if (GET_CODE (expr) == SET
40599 && v2df_reduction_p (SET_SRC (expr)))
40600 return 1;
40603 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
40604 constant pool. */
40605 if (GET_CODE (body) == SET
40606 && GET_CODE (SET_SRC (body)) == UNSPEC
40607 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
40608 && XVECLEN (SET_SRC (body), 0) == 3
40609 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
40611 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
40612 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40613 df_ref use;
40614 FOR_EACH_INSN_INFO_USE (use, insn_info)
40615 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40617 struct df_link *def_link = DF_REF_CHAIN (use);
40618 /* Punt if multiple definitions for this reg. */
40619 if (def_link && !def_link->next &&
40620 const_load_sequence_p (insn_entry,
40621 DF_REF_INSN (def_link->ref)))
40623 *special = SH_VPERM;
40624 return 1;
40629 /* Otherwise check the operands for vector lane violations. */
40630 return rtx_is_swappable_p (body, special);
40633 enum chain_purpose { FOR_LOADS, FOR_STORES };
40635 /* Return true if the UD or DU chain headed by LINK is non-empty,
40636 and every entry on the chain references an insn that is a
40637 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
40638 register swap must have only permuting loads as reaching defs.
40639 If PURPOSE is FOR_STORES, each such register swap must have only
40640 register swaps or permuting stores as reached uses. */
40641 static bool
40642 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
40643 enum chain_purpose purpose)
40645 if (!link)
40646 return false;
40648 for (; link; link = link->next)
40650 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
40651 continue;
40653 if (DF_REF_IS_ARTIFICIAL (link->ref))
40654 return false;
40656 rtx reached_insn = DF_REF_INSN (link->ref);
40657 unsigned uid = INSN_UID (reached_insn);
40658 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
40660 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
40661 || insn_entry[uid].is_store)
40662 return false;
40664 if (purpose == FOR_LOADS)
40666 df_ref use;
40667 FOR_EACH_INSN_INFO_USE (use, insn_info)
40669 struct df_link *swap_link = DF_REF_CHAIN (use);
40671 while (swap_link)
40673 if (DF_REF_IS_ARTIFICIAL (link->ref))
40674 return false;
40676 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
40677 unsigned uid2 = INSN_UID (swap_def_insn);
40679 /* Only permuting loads are allowed. */
40680 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
40681 return false;
40683 swap_link = swap_link->next;
40687 else if (purpose == FOR_STORES)
40689 df_ref def;
40690 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40692 struct df_link *swap_link = DF_REF_CHAIN (def);
40694 while (swap_link)
40696 if (DF_REF_IS_ARTIFICIAL (link->ref))
40697 return false;
40699 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
40700 unsigned uid2 = INSN_UID (swap_use_insn);
40702 /* Permuting stores or register swaps are allowed. */
40703 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
40704 return false;
40706 swap_link = swap_link->next;
40712 return true;
40715 /* Mark the xxswapdi instructions associated with permuting loads and
40716 stores for removal. Note that we only flag them for deletion here,
40717 as there is a possibility of a swap being reached from multiple
40718 loads, etc. */
40719 static void
40720 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
40722 rtx insn = insn_entry[i].insn;
40723 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40725 if (insn_entry[i].is_load)
40727 df_ref def;
40728 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40730 struct df_link *link = DF_REF_CHAIN (def);
40732 /* We know by now that these are swaps, so we can delete
40733 them confidently. */
40734 while (link)
40736 rtx use_insn = DF_REF_INSN (link->ref);
40737 insn_entry[INSN_UID (use_insn)].will_delete = 1;
40738 link = link->next;
40742 else if (insn_entry[i].is_store)
40744 df_ref use;
40745 FOR_EACH_INSN_INFO_USE (use, insn_info)
40747 /* Ignore uses for addressability. */
40748 machine_mode mode = GET_MODE (DF_REF_REG (use));
40749 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
40750 continue;
40752 struct df_link *link = DF_REF_CHAIN (use);
40754 /* We know by now that these are swaps, so we can delete
40755 them confidently. */
40756 while (link)
40758 rtx def_insn = DF_REF_INSN (link->ref);
40759 insn_entry[INSN_UID (def_insn)].will_delete = 1;
40760 link = link->next;
40766 /* OP is either a CONST_VECTOR or an expression containing one.
40767 Swap the first half of the vector with the second in the first
40768 case. Recurse to find it in the second. */
40769 static void
40770 swap_const_vector_halves (rtx op)
40772 int i;
40773 enum rtx_code code = GET_CODE (op);
40774 if (GET_CODE (op) == CONST_VECTOR)
40776 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
40777 for (i = 0; i < half_units; ++i)
40779 rtx temp = CONST_VECTOR_ELT (op, i);
40780 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
40781 CONST_VECTOR_ELT (op, i + half_units) = temp;
40784 else
40786 int j;
40787 const char *fmt = GET_RTX_FORMAT (code);
40788 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40789 if (fmt[i] == 'e' || fmt[i] == 'u')
40790 swap_const_vector_halves (XEXP (op, i));
40791 else if (fmt[i] == 'E')
40792 for (j = 0; j < XVECLEN (op, i); ++j)
40793 swap_const_vector_halves (XVECEXP (op, i, j));
40797 /* Find all subregs of a vector expression that perform a narrowing,
40798 and adjust the subreg index to account for doubleword swapping. */
40799 static void
40800 adjust_subreg_index (rtx op)
40802 enum rtx_code code = GET_CODE (op);
40803 if (code == SUBREG
40804 && (GET_MODE_SIZE (GET_MODE (op))
40805 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
40807 unsigned int index = SUBREG_BYTE (op);
40808 if (index < 8)
40809 index += 8;
40810 else
40811 index -= 8;
40812 SUBREG_BYTE (op) = index;
40815 const char *fmt = GET_RTX_FORMAT (code);
40816 int i,j;
40817 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40818 if (fmt[i] == 'e' || fmt[i] == 'u')
40819 adjust_subreg_index (XEXP (op, i));
40820 else if (fmt[i] == 'E')
40821 for (j = 0; j < XVECLEN (op, i); ++j)
40822 adjust_subreg_index (XVECEXP (op, i, j));
40825 /* Convert the non-permuting load INSN to a permuting one. */
40826 static void
40827 permute_load (rtx_insn *insn)
40829 rtx body = PATTERN (insn);
40830 rtx mem_op = SET_SRC (body);
40831 rtx tgt_reg = SET_DEST (body);
40832 machine_mode mode = GET_MODE (tgt_reg);
40833 int n_elts = GET_MODE_NUNITS (mode);
40834 int half_elts = n_elts / 2;
40835 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40836 int i, j;
40837 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40838 XVECEXP (par, 0, i) = GEN_INT (j);
40839 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40840 XVECEXP (par, 0, i) = GEN_INT (j);
40841 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
40842 SET_SRC (body) = sel;
40843 INSN_CODE (insn) = -1; /* Force re-recognition. */
40844 df_insn_rescan (insn);
40846 if (dump_file)
40847 fprintf (dump_file, "Replacing load %d with permuted load\n",
40848 INSN_UID (insn));
40851 /* Convert the non-permuting store INSN to a permuting one. */
40852 static void
40853 permute_store (rtx_insn *insn)
40855 rtx body = PATTERN (insn);
40856 rtx src_reg = SET_SRC (body);
40857 machine_mode mode = GET_MODE (src_reg);
40858 int n_elts = GET_MODE_NUNITS (mode);
40859 int half_elts = n_elts / 2;
40860 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40861 int i, j;
40862 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40863 XVECEXP (par, 0, i) = GEN_INT (j);
40864 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40865 XVECEXP (par, 0, i) = GEN_INT (j);
40866 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
40867 SET_SRC (body) = sel;
40868 INSN_CODE (insn) = -1; /* Force re-recognition. */
40869 df_insn_rescan (insn);
40871 if (dump_file)
40872 fprintf (dump_file, "Replacing store %d with permuted store\n",
40873 INSN_UID (insn));
40876 /* Given OP that contains a vector extract operation, adjust the index
40877 of the extracted lane to account for the doubleword swap. */
40878 static void
40879 adjust_extract (rtx_insn *insn)
40881 rtx pattern = PATTERN (insn);
40882 if (GET_CODE (pattern) == PARALLEL)
40883 pattern = XVECEXP (pattern, 0, 0);
40884 rtx src = SET_SRC (pattern);
40885 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40886 account for that. */
40887 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40888 rtx par = XEXP (sel, 1);
40889 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40890 int lane = INTVAL (XVECEXP (par, 0, 0));
40891 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40892 XVECEXP (par, 0, 0) = GEN_INT (lane);
40893 INSN_CODE (insn) = -1; /* Force re-recognition. */
40894 df_insn_rescan (insn);
40896 if (dump_file)
40897 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40900 /* Given OP that contains a vector direct-splat operation, adjust the index
40901 of the source lane to account for the doubleword swap. */
40902 static void
40903 adjust_splat (rtx_insn *insn)
40905 rtx body = PATTERN (insn);
40906 rtx unspec = XEXP (body, 1);
40907 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40908 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40909 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40910 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40911 INSN_CODE (insn) = -1; /* Force re-recognition. */
40912 df_insn_rescan (insn);
40914 if (dump_file)
40915 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40918 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40919 swap), reverse the order of the source operands and adjust the indices
40920 of the source lanes to account for doubleword reversal. */
40921 static void
40922 adjust_xxpermdi (rtx_insn *insn)
40924 rtx set = PATTERN (insn);
40925 rtx select = XEXP (set, 1);
40926 rtx concat = XEXP (select, 0);
40927 rtx src0 = XEXP (concat, 0);
40928 XEXP (concat, 0) = XEXP (concat, 1);
40929 XEXP (concat, 1) = src0;
40930 rtx parallel = XEXP (select, 1);
40931 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40932 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40933 int new_lane0 = 3 - lane1;
40934 int new_lane1 = 3 - lane0;
40935 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40936 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40937 INSN_CODE (insn) = -1; /* Force re-recognition. */
40938 df_insn_rescan (insn);
40940 if (dump_file)
40941 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40944 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40945 reverse the order of those inputs. */
40946 static void
40947 adjust_concat (rtx_insn *insn)
40949 rtx set = PATTERN (insn);
40950 rtx concat = XEXP (set, 1);
40951 rtx src0 = XEXP (concat, 0);
40952 XEXP (concat, 0) = XEXP (concat, 1);
40953 XEXP (concat, 1) = src0;
40954 INSN_CODE (insn) = -1; /* Force re-recognition. */
40955 df_insn_rescan (insn);
40957 if (dump_file)
40958 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40961 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40962 constant pool to reflect swapped doublewords. */
40963 static void
40964 adjust_vperm (rtx_insn *insn)
40966 /* We previously determined that the UNSPEC_VPERM was fed by a
40967 swap of a swapping load of a TOC-relative constant pool symbol.
40968 Find the MEM in the swapping load and replace it with a MEM for
40969 the adjusted mask constant. */
40970 rtx set = PATTERN (insn);
40971 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40973 /* Find the swap. */
40974 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40975 df_ref use;
40976 rtx_insn *swap_insn = 0;
40977 FOR_EACH_INSN_INFO_USE (use, insn_info)
40978 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40980 struct df_link *def_link = DF_REF_CHAIN (use);
40981 gcc_assert (def_link && !def_link->next);
40982 swap_insn = DF_REF_INSN (def_link->ref);
40983 break;
40985 gcc_assert (swap_insn);
40987 /* Find the load. */
40988 insn_info = DF_INSN_INFO_GET (swap_insn);
40989 rtx_insn *load_insn = 0;
40990 FOR_EACH_INSN_INFO_USE (use, insn_info)
40992 struct df_link *def_link = DF_REF_CHAIN (use);
40993 gcc_assert (def_link && !def_link->next);
40994 load_insn = DF_REF_INSN (def_link->ref);
40995 break;
40997 gcc_assert (load_insn);
40999 /* Find the TOC-relative symbol access. */
41000 insn_info = DF_INSN_INFO_GET (load_insn);
41001 rtx_insn *tocrel_insn = 0;
41002 FOR_EACH_INSN_INFO_USE (use, insn_info)
41004 struct df_link *def_link = DF_REF_CHAIN (use);
41005 gcc_assert (def_link && !def_link->next);
41006 tocrel_insn = DF_REF_INSN (def_link->ref);
41007 break;
41009 gcc_assert (tocrel_insn);
41011 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
41012 to set tocrel_base; otherwise it would be unnecessary as we've
41013 already established it will return true. */
41014 rtx base, offset;
41015 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
41016 /* There is an extra level of indirection for small/large code models. */
41017 if (GET_CODE (tocrel_expr) == MEM)
41018 tocrel_expr = XEXP (tocrel_expr, 0);
41019 if (!toc_relative_expr_p (tocrel_expr, false))
41020 gcc_unreachable ();
41021 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
41022 rtx const_vector = get_pool_constant (base);
41023 /* With the extra indirection, get_pool_constant will produce the
41024 real constant from the reg_equal expression, so get the real
41025 constant. */
41026 if (GET_CODE (const_vector) == SYMBOL_REF)
41027 const_vector = get_pool_constant (const_vector);
41028 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
41030 /* Create an adjusted mask from the initial mask. */
41031 unsigned int new_mask[16], i, val;
41032 for (i = 0; i < 16; ++i) {
41033 val = INTVAL (XVECEXP (const_vector, 0, i));
41034 if (val < 16)
41035 new_mask[i] = (val + 8) % 16;
41036 else
41037 new_mask[i] = ((val + 8) % 16) + 16;
41040 /* Create a new CONST_VECTOR and a MEM that references it. */
41041 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
41042 for (i = 0; i < 16; ++i)
41043 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
41044 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
41045 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
41046 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
41047 can't recognize. Force the SYMBOL_REF into a register. */
41048 if (!REG_P (XEXP (new_mem, 0))) {
41049 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
41050 XEXP (new_mem, 0) = base_reg;
41051 /* Move the newly created insn ahead of the load insn. */
41052 rtx_insn *force_insn = get_last_insn ();
41053 remove_insn (force_insn);
41054 rtx_insn *before_load_insn = PREV_INSN (load_insn);
41055 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
41056 df_insn_rescan (before_load_insn);
41057 df_insn_rescan (force_insn);
41060 /* Replace the MEM in the load instruction and rescan it. */
41061 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
41062 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
41063 df_insn_rescan (load_insn);
41065 if (dump_file)
41066 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
41069 /* The insn described by INSN_ENTRY[I] can be swapped, but only
41070 with special handling. Take care of that here. */
41071 static void
41072 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
41074 rtx_insn *insn = insn_entry[i].insn;
41075 rtx body = PATTERN (insn);
41077 switch (insn_entry[i].special_handling)
41079 default:
41080 gcc_unreachable ();
41081 case SH_CONST_VECTOR:
41083 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
41084 gcc_assert (GET_CODE (body) == SET);
41085 rtx rhs = SET_SRC (body);
41086 swap_const_vector_halves (rhs);
41087 if (dump_file)
41088 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
41089 break;
41091 case SH_SUBREG:
41092 /* A subreg of the same size is already safe. For subregs that
41093 select a smaller portion of a reg, adjust the index for
41094 swapped doublewords. */
41095 adjust_subreg_index (body);
41096 if (dump_file)
41097 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
41098 break;
41099 case SH_NOSWAP_LD:
41100 /* Convert a non-permuting load to a permuting one. */
41101 permute_load (insn);
41102 break;
41103 case SH_NOSWAP_ST:
41104 /* Convert a non-permuting store to a permuting one. */
41105 permute_store (insn);
41106 break;
41107 case SH_EXTRACT:
41108 /* Change the lane on an extract operation. */
41109 adjust_extract (insn);
41110 break;
41111 case SH_SPLAT:
41112 /* Change the lane on a direct-splat operation. */
41113 adjust_splat (insn);
41114 break;
41115 case SH_XXPERMDI:
41116 /* Change the lanes on an XXPERMDI operation. */
41117 adjust_xxpermdi (insn);
41118 break;
41119 case SH_CONCAT:
41120 /* Reverse the order of a concatenation operation. */
41121 adjust_concat (insn);
41122 break;
41123 case SH_VPERM:
41124 /* Change the mask loaded from the constant pool for a VPERM. */
41125 adjust_vperm (insn);
41126 break;
41130 /* Find the insn from the Ith table entry, which is known to be a
41131 register swap Y = SWAP(X). Replace it with a copy Y = X. */
41132 static void
41133 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
41135 rtx_insn *insn = insn_entry[i].insn;
41136 rtx body = PATTERN (insn);
41137 rtx src_reg = XEXP (SET_SRC (body), 0);
41138 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
41139 rtx_insn *new_insn = emit_insn_before (copy, insn);
41140 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
41141 df_insn_rescan (new_insn);
41143 if (dump_file)
41145 unsigned int new_uid = INSN_UID (new_insn);
41146 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
41149 df_insn_delete (insn);
41150 remove_insn (insn);
41151 insn->set_deleted ();
41154 /* Dump the swap table to DUMP_FILE. */
41155 static void
41156 dump_swap_insn_table (swap_web_entry *insn_entry)
41158 int e = get_max_uid ();
41159 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
41161 for (int i = 0; i < e; ++i)
41162 if (insn_entry[i].is_relevant)
41164 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
41165 fprintf (dump_file, "%6d %6d ", i,
41166 pred_entry && pred_entry->insn
41167 ? INSN_UID (pred_entry->insn) : 0);
41168 if (insn_entry[i].is_load)
41169 fputs ("load ", dump_file);
41170 if (insn_entry[i].is_store)
41171 fputs ("store ", dump_file);
41172 if (insn_entry[i].is_swap)
41173 fputs ("swap ", dump_file);
41174 if (insn_entry[i].is_live_in)
41175 fputs ("live-in ", dump_file);
41176 if (insn_entry[i].is_live_out)
41177 fputs ("live-out ", dump_file);
41178 if (insn_entry[i].contains_subreg)
41179 fputs ("subreg ", dump_file);
41180 if (insn_entry[i].is_128_int)
41181 fputs ("int128 ", dump_file);
41182 if (insn_entry[i].is_call)
41183 fputs ("call ", dump_file);
41184 if (insn_entry[i].is_swappable)
41186 fputs ("swappable ", dump_file);
41187 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
41188 fputs ("special:constvec ", dump_file);
41189 else if (insn_entry[i].special_handling == SH_SUBREG)
41190 fputs ("special:subreg ", dump_file);
41191 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
41192 fputs ("special:load ", dump_file);
41193 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
41194 fputs ("special:store ", dump_file);
41195 else if (insn_entry[i].special_handling == SH_EXTRACT)
41196 fputs ("special:extract ", dump_file);
41197 else if (insn_entry[i].special_handling == SH_SPLAT)
41198 fputs ("special:splat ", dump_file);
41199 else if (insn_entry[i].special_handling == SH_XXPERMDI)
41200 fputs ("special:xxpermdi ", dump_file);
41201 else if (insn_entry[i].special_handling == SH_CONCAT)
41202 fputs ("special:concat ", dump_file);
41203 else if (insn_entry[i].special_handling == SH_VPERM)
41204 fputs ("special:vperm ", dump_file);
41206 if (insn_entry[i].web_not_optimizable)
41207 fputs ("unoptimizable ", dump_file);
41208 if (insn_entry[i].will_delete)
41209 fputs ("delete ", dump_file);
41210 fputs ("\n", dump_file);
41212 fputs ("\n", dump_file);
41215 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
41216 Here RTX is an (& addr (const_int -16)). Always return a new copy
41217 to avoid problems with combine. */
41218 static rtx
41219 alignment_with_canonical_addr (rtx align)
41221 rtx canon;
41222 rtx addr = XEXP (align, 0);
41224 if (REG_P (addr))
41225 canon = addr;
41227 else if (GET_CODE (addr) == PLUS)
41229 rtx addrop0 = XEXP (addr, 0);
41230 rtx addrop1 = XEXP (addr, 1);
41232 if (!REG_P (addrop0))
41233 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
41235 if (!REG_P (addrop1))
41236 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
41238 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
41241 else
41242 canon = force_reg (GET_MODE (addr), addr);
41244 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
41247 /* Check whether an rtx is an alignment mask, and if so, return
41248 a fully-expanded rtx for the masking operation. */
41249 static rtx
41250 alignment_mask (rtx_insn *insn)
41252 rtx body = PATTERN (insn);
41254 if (GET_CODE (body) != SET
41255 || GET_CODE (SET_SRC (body)) != AND
41256 || !REG_P (XEXP (SET_SRC (body), 0)))
41257 return 0;
41259 rtx mask = XEXP (SET_SRC (body), 1);
41261 if (GET_CODE (mask) == CONST_INT)
41263 if (INTVAL (mask) == -16)
41264 return alignment_with_canonical_addr (SET_SRC (body));
41265 else
41266 return 0;
41269 if (!REG_P (mask))
41270 return 0;
41272 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41273 df_ref use;
41274 rtx real_mask = 0;
41276 FOR_EACH_INSN_INFO_USE (use, insn_info)
41278 if (!rtx_equal_p (DF_REF_REG (use), mask))
41279 continue;
41281 struct df_link *def_link = DF_REF_CHAIN (use);
41282 if (!def_link || def_link->next)
41283 return 0;
41285 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
41286 rtx const_body = PATTERN (const_insn);
41287 if (GET_CODE (const_body) != SET)
41288 return 0;
41290 real_mask = SET_SRC (const_body);
41292 if (GET_CODE (real_mask) != CONST_INT
41293 || INTVAL (real_mask) != -16)
41294 return 0;
41297 if (real_mask == 0)
41298 return 0;
41300 return alignment_with_canonical_addr (SET_SRC (body));
41303 /* Given INSN that's a load or store based at BASE_REG, look for a
41304 feeding computation that aligns its address on a 16-byte boundary. */
41305 static rtx
41306 find_alignment_op (rtx_insn *insn, rtx base_reg)
41308 df_ref base_use;
41309 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41310 rtx and_operation = 0;
41312 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41314 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41315 continue;
41317 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41318 if (!base_def_link || base_def_link->next)
41319 break;
41321 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
41322 and_operation = alignment_mask (and_insn);
41323 if (and_operation != 0)
41324 break;
41327 return and_operation;
41330 struct del_info { bool replace; rtx_insn *replace_insn; };
41332 /* If INSN is the load for an lvx pattern, put it in canonical form. */
41333 static void
41334 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
41336 rtx body = PATTERN (insn);
41337 gcc_assert (GET_CODE (body) == SET
41338 && GET_CODE (SET_SRC (body)) == VEC_SELECT
41339 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
41341 rtx mem = XEXP (SET_SRC (body), 0);
41342 rtx base_reg = XEXP (mem, 0);
41344 rtx and_operation = find_alignment_op (insn, base_reg);
41346 if (and_operation != 0)
41348 df_ref def;
41349 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41350 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41352 struct df_link *link = DF_REF_CHAIN (def);
41353 if (!link || link->next)
41354 break;
41356 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41357 if (!insn_is_swap_p (swap_insn)
41358 || insn_is_load_p (swap_insn)
41359 || insn_is_store_p (swap_insn))
41360 break;
41362 /* Expected lvx pattern found. Change the swap to
41363 a copy, and propagate the AND operation into the
41364 load. */
41365 to_delete[INSN_UID (swap_insn)].replace = true;
41366 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41368 XEXP (mem, 0) = and_operation;
41369 SET_SRC (body) = mem;
41370 INSN_CODE (insn) = -1; /* Force re-recognition. */
41371 df_insn_rescan (insn);
41373 if (dump_file)
41374 fprintf (dump_file, "lvx opportunity found at %d\n",
41375 INSN_UID (insn));
41380 /* If INSN is the store for an stvx pattern, put it in canonical form. */
41381 static void
41382 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
41384 rtx body = PATTERN (insn);
41385 gcc_assert (GET_CODE (body) == SET
41386 && GET_CODE (SET_DEST (body)) == MEM
41387 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
41388 rtx mem = SET_DEST (body);
41389 rtx base_reg = XEXP (mem, 0);
41391 rtx and_operation = find_alignment_op (insn, base_reg);
41393 if (and_operation != 0)
41395 rtx src_reg = XEXP (SET_SRC (body), 0);
41396 df_ref src_use;
41397 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41398 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
41400 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
41401 continue;
41403 struct df_link *link = DF_REF_CHAIN (src_use);
41404 if (!link || link->next)
41405 break;
41407 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41408 if (!insn_is_swap_p (swap_insn)
41409 || insn_is_load_p (swap_insn)
41410 || insn_is_store_p (swap_insn))
41411 break;
41413 /* Expected stvx pattern found. Change the swap to
41414 a copy, and propagate the AND operation into the
41415 store. */
41416 to_delete[INSN_UID (swap_insn)].replace = true;
41417 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41419 XEXP (mem, 0) = and_operation;
41420 SET_SRC (body) = src_reg;
41421 INSN_CODE (insn) = -1; /* Force re-recognition. */
41422 df_insn_rescan (insn);
41424 if (dump_file)
41425 fprintf (dump_file, "stvx opportunity found at %d\n",
41426 INSN_UID (insn));
41431 /* Look for patterns created from builtin lvx and stvx calls, and
41432 canonicalize them to be properly recognized as such. */
41433 static void
41434 recombine_lvx_stvx_patterns (function *fun)
41436 int i;
41437 basic_block bb;
41438 rtx_insn *insn;
41440 int num_insns = get_max_uid ();
41441 del_info *to_delete = XCNEWVEC (del_info, num_insns);
41443 FOR_ALL_BB_FN (bb, fun)
41444 FOR_BB_INSNS (bb, insn)
41446 if (!NONDEBUG_INSN_P (insn))
41447 continue;
41449 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
41450 recombine_lvx_pattern (insn, to_delete);
41451 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
41452 recombine_stvx_pattern (insn, to_delete);
41455 /* Turning swaps into copies is delayed until now, to avoid problems
41456 with deleting instructions during the insn walk. */
41457 for (i = 0; i < num_insns; i++)
41458 if (to_delete[i].replace)
41460 rtx swap_body = PATTERN (to_delete[i].replace_insn);
41461 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
41462 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
41463 rtx_insn *new_insn = emit_insn_before (copy,
41464 to_delete[i].replace_insn);
41465 set_block_for_insn (new_insn,
41466 BLOCK_FOR_INSN (to_delete[i].replace_insn));
41467 df_insn_rescan (new_insn);
41468 df_insn_delete (to_delete[i].replace_insn);
41469 remove_insn (to_delete[i].replace_insn);
41470 to_delete[i].replace_insn->set_deleted ();
41473 free (to_delete);
41476 /* Main entry point for this pass. */
41477 unsigned int
41478 rs6000_analyze_swaps (function *fun)
41480 swap_web_entry *insn_entry;
41481 basic_block bb;
41482 rtx_insn *insn, *curr_insn = 0;
41484 /* Dataflow analysis for use-def chains. */
41485 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
41486 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
41487 df_analyze ();
41488 df_set_flags (DF_DEFER_INSN_RESCAN);
41490 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
41491 recombine_lvx_stvx_patterns (fun);
41493 /* Allocate structure to represent webs of insns. */
41494 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
41496 /* Walk the insns to gather basic data. */
41497 FOR_ALL_BB_FN (bb, fun)
41498 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
41500 unsigned int uid = INSN_UID (insn);
41501 if (NONDEBUG_INSN_P (insn))
41503 insn_entry[uid].insn = insn;
41505 if (GET_CODE (insn) == CALL_INSN)
41506 insn_entry[uid].is_call = 1;
41508 /* Walk the uses and defs to see if we mention vector regs.
41509 Record any constraints on optimization of such mentions. */
41510 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41511 df_ref mention;
41512 FOR_EACH_INSN_INFO_USE (mention, insn_info)
41514 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41515 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41517 /* If a use gets its value from a call insn, it will be
41518 a hard register and will look like (reg:V4SI 3 3).
41519 The df analysis creates two mentions for GPR3 and GPR4,
41520 both DImode. We must recognize this and treat it as a
41521 vector mention to ensure the call is unioned with this
41522 use. */
41523 if (mode == DImode && DF_REF_INSN_INFO (mention))
41525 rtx feeder = DF_REF_INSN (mention);
41526 /* FIXME: It is pretty hard to get from the df mention
41527 to the mode of the use in the insn. We arbitrarily
41528 pick a vector mode here, even though the use might
41529 be a real DImode. We can be too conservative
41530 (create a web larger than necessary) because of
41531 this, so consider eventually fixing this. */
41532 if (GET_CODE (feeder) == CALL_INSN)
41533 mode = V4SImode;
41536 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41538 insn_entry[uid].is_relevant = 1;
41539 if (mode == TImode || mode == V1TImode
41540 || FLOAT128_VECTOR_P (mode))
41541 insn_entry[uid].is_128_int = 1;
41542 if (DF_REF_INSN_INFO (mention))
41543 insn_entry[uid].contains_subreg
41544 = !rtx_equal_p (DF_REF_REG (mention),
41545 DF_REF_REAL_REG (mention));
41546 union_defs (insn_entry, insn, mention);
41549 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
41551 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41552 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41554 /* If we're loading up a hard vector register for a call,
41555 it looks like (set (reg:V4SI 9 9) (...)). The df
41556 analysis creates two mentions for GPR9 and GPR10, both
41557 DImode. So relying on the mode from the mentions
41558 isn't sufficient to ensure we union the call into the
41559 web with the parameter setup code. */
41560 if (mode == DImode && GET_CODE (insn) == SET
41561 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
41562 mode = GET_MODE (SET_DEST (insn));
41564 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41566 insn_entry[uid].is_relevant = 1;
41567 if (mode == TImode || mode == V1TImode
41568 || FLOAT128_VECTOR_P (mode))
41569 insn_entry[uid].is_128_int = 1;
41570 if (DF_REF_INSN_INFO (mention))
41571 insn_entry[uid].contains_subreg
41572 = !rtx_equal_p (DF_REF_REG (mention),
41573 DF_REF_REAL_REG (mention));
41574 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
41575 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
41576 insn_entry[uid].is_live_out = 1;
41577 union_uses (insn_entry, insn, mention);
41581 if (insn_entry[uid].is_relevant)
41583 /* Determine if this is a load or store. */
41584 insn_entry[uid].is_load = insn_is_load_p (insn);
41585 insn_entry[uid].is_store = insn_is_store_p (insn);
41587 /* Determine if this is a doubleword swap. If not,
41588 determine whether it can legally be swapped. */
41589 if (insn_is_swap_p (insn))
41590 insn_entry[uid].is_swap = 1;
41591 else
41593 unsigned int special = SH_NONE;
41594 insn_entry[uid].is_swappable
41595 = insn_is_swappable_p (insn_entry, insn, &special);
41596 if (special != SH_NONE && insn_entry[uid].contains_subreg)
41597 insn_entry[uid].is_swappable = 0;
41598 else if (special != SH_NONE)
41599 insn_entry[uid].special_handling = special;
41600 else if (insn_entry[uid].contains_subreg)
41601 insn_entry[uid].special_handling = SH_SUBREG;
41607 if (dump_file)
41609 fprintf (dump_file, "\nSwap insn entry table when first built\n");
41610 dump_swap_insn_table (insn_entry);
41613 /* Record unoptimizable webs. */
41614 unsigned e = get_max_uid (), i;
41615 for (i = 0; i < e; ++i)
41617 if (!insn_entry[i].is_relevant)
41618 continue;
41620 swap_web_entry *root
41621 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
41623 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
41624 || (insn_entry[i].contains_subreg
41625 && insn_entry[i].special_handling != SH_SUBREG)
41626 || insn_entry[i].is_128_int || insn_entry[i].is_call
41627 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
41628 root->web_not_optimizable = 1;
41630 /* If we have loads or stores that aren't permuting then the
41631 optimization isn't appropriate. */
41632 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
41633 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
41634 root->web_not_optimizable = 1;
41636 /* If we have permuting loads or stores that are not accompanied
41637 by a register swap, the optimization isn't appropriate. */
41638 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
41640 rtx insn = insn_entry[i].insn;
41641 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41642 df_ref def;
41644 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41646 struct df_link *link = DF_REF_CHAIN (def);
41648 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
41650 root->web_not_optimizable = 1;
41651 break;
41655 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
41657 rtx insn = insn_entry[i].insn;
41658 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41659 df_ref use;
41661 FOR_EACH_INSN_INFO_USE (use, insn_info)
41663 struct df_link *link = DF_REF_CHAIN (use);
41665 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
41667 root->web_not_optimizable = 1;
41668 break;
41674 if (dump_file)
41676 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
41677 dump_swap_insn_table (insn_entry);
41680 /* For each load and store in an optimizable web (which implies
41681 the loads and stores are permuting), find the associated
41682 register swaps and mark them for removal. Due to various
41683 optimizations we may mark the same swap more than once. Also
41684 perform special handling for swappable insns that require it. */
41685 for (i = 0; i < e; ++i)
41686 if ((insn_entry[i].is_load || insn_entry[i].is_store)
41687 && insn_entry[i].is_swap)
41689 swap_web_entry* root_entry
41690 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41691 if (!root_entry->web_not_optimizable)
41692 mark_swaps_for_removal (insn_entry, i);
41694 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
41696 swap_web_entry* root_entry
41697 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41698 if (!root_entry->web_not_optimizable)
41699 handle_special_swappables (insn_entry, i);
41702 /* Now delete the swaps marked for removal. */
41703 for (i = 0; i < e; ++i)
41704 if (insn_entry[i].will_delete)
41705 replace_swap_with_copy (insn_entry, i);
41707 /* Clean up. */
41708 free (insn_entry);
41709 return 0;
41712 const pass_data pass_data_analyze_swaps =
41714 RTL_PASS, /* type */
41715 "swaps", /* name */
41716 OPTGROUP_NONE, /* optinfo_flags */
41717 TV_NONE, /* tv_id */
41718 0, /* properties_required */
41719 0, /* properties_provided */
41720 0, /* properties_destroyed */
41721 0, /* todo_flags_start */
41722 TODO_df_finish, /* todo_flags_finish */
41725 class pass_analyze_swaps : public rtl_opt_pass
41727 public:
41728 pass_analyze_swaps(gcc::context *ctxt)
41729 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
41732 /* opt_pass methods: */
41733 virtual bool gate (function *)
41735 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
41736 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
41739 virtual unsigned int execute (function *fun)
41741 return rs6000_analyze_swaps (fun);
41744 }; // class pass_analyze_swaps
41746 rtl_opt_pass *
41747 make_pass_analyze_swaps (gcc::context *ctxt)
41749 return new pass_analyze_swaps (ctxt);
41752 #ifdef RS6000_GLIBC_ATOMIC_FENV
41753 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
41754 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
41755 #endif
41757 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
41759 static void
41760 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
41762 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
41764 #ifdef RS6000_GLIBC_ATOMIC_FENV
41765 if (atomic_hold_decl == NULL_TREE)
41767 atomic_hold_decl
41768 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41769 get_identifier ("__atomic_feholdexcept"),
41770 build_function_type_list (void_type_node,
41771 double_ptr_type_node,
41772 NULL_TREE));
41773 TREE_PUBLIC (atomic_hold_decl) = 1;
41774 DECL_EXTERNAL (atomic_hold_decl) = 1;
41777 if (atomic_clear_decl == NULL_TREE)
41779 atomic_clear_decl
41780 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41781 get_identifier ("__atomic_feclearexcept"),
41782 build_function_type_list (void_type_node,
41783 NULL_TREE));
41784 TREE_PUBLIC (atomic_clear_decl) = 1;
41785 DECL_EXTERNAL (atomic_clear_decl) = 1;
41788 tree const_double = build_qualified_type (double_type_node,
41789 TYPE_QUAL_CONST);
41790 tree const_double_ptr = build_pointer_type (const_double);
41791 if (atomic_update_decl == NULL_TREE)
41793 atomic_update_decl
41794 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41795 get_identifier ("__atomic_feupdateenv"),
41796 build_function_type_list (void_type_node,
41797 const_double_ptr,
41798 NULL_TREE));
41799 TREE_PUBLIC (atomic_update_decl) = 1;
41800 DECL_EXTERNAL (atomic_update_decl) = 1;
41803 tree fenv_var = create_tmp_var_raw (double_type_node);
41804 TREE_ADDRESSABLE (fenv_var) = 1;
41805 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
41807 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
41808 *clear = build_call_expr (atomic_clear_decl, 0);
41809 *update = build_call_expr (atomic_update_decl, 1,
41810 fold_convert (const_double_ptr, fenv_addr));
41811 #endif
41812 return;
41815 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
41816 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
41817 tree call_mffs = build_call_expr (mffs, 0);
41819 /* Generates the equivalent of feholdexcept (&fenv_var)
41821 *fenv_var = __builtin_mffs ();
41822 double fenv_hold;
41823 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
41824 __builtin_mtfsf (0xff, fenv_hold); */
41826 /* Mask to clear everything except for the rounding modes and non-IEEE
41827 arithmetic flag. */
41828 const unsigned HOST_WIDE_INT hold_exception_mask =
41829 HOST_WIDE_INT_C (0xffffffff00000007);
41831 tree fenv_var = create_tmp_var_raw (double_type_node);
41833 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
41835 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
41836 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41837 build_int_cst (uint64_type_node,
41838 hold_exception_mask));
41840 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41841 fenv_llu_and);
41843 tree hold_mtfsf = build_call_expr (mtfsf, 2,
41844 build_int_cst (unsigned_type_node, 0xff),
41845 fenv_hold_mtfsf);
41847 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
41849 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
41851 double fenv_clear = __builtin_mffs ();
41852 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
41853 __builtin_mtfsf (0xff, fenv_clear); */
41855 /* Mask to clear everything except for the rounding modes and non-IEEE
41856 arithmetic flag. */
41857 const unsigned HOST_WIDE_INT clear_exception_mask =
41858 HOST_WIDE_INT_C (0xffffffff00000000);
41860 tree fenv_clear = create_tmp_var_raw (double_type_node);
41862 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
41864 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
41865 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
41866 fenv_clean_llu,
41867 build_int_cst (uint64_type_node,
41868 clear_exception_mask));
41870 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41871 fenv_clear_llu_and);
41873 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41874 build_int_cst (unsigned_type_node, 0xff),
41875 fenv_clear_mtfsf);
41877 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41879 /* Generates the equivalent of feupdateenv (&fenv_var)
41881 double old_fenv = __builtin_mffs ();
41882 double fenv_update;
41883 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41884 (*(uint64_t*)fenv_var 0x1ff80fff);
41885 __builtin_mtfsf (0xff, fenv_update); */
41887 const unsigned HOST_WIDE_INT update_exception_mask =
41888 HOST_WIDE_INT_C (0xffffffff1fffff00);
41889 const unsigned HOST_WIDE_INT new_exception_mask =
41890 HOST_WIDE_INT_C (0x1ff80fff);
41892 tree old_fenv = create_tmp_var_raw (double_type_node);
41893 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41895 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41896 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41897 build_int_cst (uint64_type_node,
41898 update_exception_mask));
41900 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41901 build_int_cst (uint64_type_node,
41902 new_exception_mask));
41904 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41905 old_llu_and, new_llu_and);
41907 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41908 new_llu_mask);
41910 tree update_mtfsf = build_call_expr (mtfsf, 2,
41911 build_int_cst (unsigned_type_node, 0xff),
41912 fenv_update_mtfsf);
41914 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41917 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41919 static bool
41920 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41921 optimization_type opt_type)
41923 switch (op)
41925 case rsqrt_optab:
41926 return (opt_type == OPTIMIZE_FOR_SPEED
41927 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41929 default:
41930 return true;
41934 struct gcc_target targetm = TARGET_INITIALIZER;
41936 #include "gt-rs6000.h"