[AArch64] Use UNSPEC_MERGE_PTRUE for comparisons
[official-gcc.git] / gcc / config / powerpcspe / powerpcspe.c
blobb500cd3b6680a308e1651700540dbc31795debf9
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #if TARGET_MACHO
78 #include "gstab.h" /* for N_SLINE */
79 #endif
80 #include "case-cfn-macros.h"
81 #include "ppc-auxv.h"
82 #include "rtx-vector-builder.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 #ifndef TARGET_NO_PROTOTYPE
88 #define TARGET_NO_PROTOTYPE 0
89 #endif
91 #define min(A,B) ((A) < (B) ? (A) : (B))
92 #define max(A,B) ((A) > (B) ? (A) : (B))
94 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
96 /* Structure used to define the rs6000 stack */
97 typedef struct rs6000_stack {
98 int reload_completed; /* stack info won't change from here on */
99 int first_gp_reg_save; /* first callee saved GP register used */
100 int first_fp_reg_save; /* first callee saved FP register used */
101 int first_altivec_reg_save; /* first callee saved AltiVec register used */
102 int lr_save_p; /* true if the link reg needs to be saved */
103 int cr_save_p; /* true if the CR reg needs to be saved */
104 unsigned int vrsave_mask; /* mask of vec registers to save */
105 int push_p; /* true if we need to allocate stack space */
106 int calls_p; /* true if the function makes any calls */
107 int world_save_p; /* true if we're saving *everything*:
108 r13-r31, cr, f14-f31, vrsave, v20-v31 */
109 enum rs6000_abi abi; /* which ABI to use */
110 int gp_save_offset; /* offset to save GP regs from initial SP */
111 int fp_save_offset; /* offset to save FP regs from initial SP */
112 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
113 int lr_save_offset; /* offset to save LR from initial SP */
114 int cr_save_offset; /* offset to save CR from initial SP */
115 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
116 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
117 int varargs_save_offset; /* offset to save the varargs registers */
118 int ehrd_offset; /* offset to EH return data */
119 int ehcr_offset; /* offset to EH CR field data */
120 int reg_size; /* register size (4 or 8) */
121 HOST_WIDE_INT vars_size; /* variable save area size */
122 int parm_size; /* outgoing parameter size */
123 int save_size; /* save area size */
124 int fixed_size; /* fixed size of stack frame */
125 int gp_size; /* size of saved GP registers */
126 int fp_size; /* size of saved FP registers */
127 int altivec_size; /* size of saved AltiVec registers */
128 int cr_size; /* size to hold CR if not in fixed area */
129 int vrsave_size; /* size to hold VRSAVE */
130 int altivec_padding_size; /* size of altivec alignment padding */
131 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
132 int spe_padding_size;
133 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
134 int spe_64bit_regs_used;
135 int savres_strategy;
136 } rs6000_stack_t;
138 /* A C structure for machine-specific, per-function data.
139 This is added to the cfun structure. */
140 typedef struct GTY(()) machine_function
142 /* Whether the instruction chain has been scanned already. */
143 int spe_insn_chain_scanned_p;
144 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
145 int ra_needs_full_frame;
146 /* Flags if __builtin_return_address (0) was used. */
147 int ra_need_lr;
148 /* Cache lr_save_p after expansion of builtin_eh_return. */
149 int lr_save_state;
150 /* Whether we need to save the TOC to the reserved stack location in the
151 function prologue. */
152 bool save_toc_in_prologue;
153 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
154 varargs save area. */
155 HOST_WIDE_INT varargs_save_offset;
156 /* Temporary stack slot to use for SDmode copies. This slot is
157 64-bits wide and is allocated early enough so that the offset
158 does not overflow the 16-bit load/store offset field. */
159 rtx sdmode_stack_slot;
160 /* Alternative internal arg pointer for -fsplit-stack. */
161 rtx split_stack_arg_pointer;
162 bool split_stack_argp_used;
163 /* Flag if r2 setup is needed with ELFv2 ABI. */
164 bool r2_setup_needed;
165 /* The number of components we use for separate shrink-wrapping. */
166 int n_components;
167 /* The components already handled by separate shrink-wrapping, which should
168 not be considered by the prologue and epilogue. */
169 bool gpr_is_wrapped_separately[32];
170 bool fpr_is_wrapped_separately[32];
171 bool lr_is_wrapped_separately;
172 } machine_function;
174 /* Support targetm.vectorize.builtin_mask_for_load. */
175 static GTY(()) tree altivec_builtin_mask_for_load;
177 /* Set to nonzero once AIX common-mode calls have been defined. */
178 static GTY(()) int common_mode_defined;
180 /* Label number of label created for -mrelocatable, to call to so we can
181 get the address of the GOT section */
182 static int rs6000_pic_labelno;
184 #ifdef USING_ELFOS_H
185 /* Counter for labels which are to be placed in .fixup. */
186 int fixuplabelno = 0;
187 #endif
189 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
190 int dot_symbols;
192 /* Specify the machine mode that pointers have. After generation of rtl, the
193 compiler makes no further distinction between pointers and any other objects
194 of this machine mode. */
195 scalar_int_mode rs6000_pmode;
197 /* Width in bits of a pointer. */
198 unsigned rs6000_pointer_size;
200 #ifdef HAVE_AS_GNU_ATTRIBUTE
201 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
202 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
203 # endif
204 /* Flag whether floating point values have been passed/returned.
205 Note that this doesn't say whether fprs are used, since the
206 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
207 should be set for soft-float values passed in gprs and ieee128
208 values passed in vsx registers. */
209 static bool rs6000_passes_float;
210 static bool rs6000_passes_long_double;
211 /* Flag whether vector values have been passed/returned. */
212 static bool rs6000_passes_vector;
213 /* Flag whether small (<= 8 byte) structures have been returned. */
214 static bool rs6000_returns_struct;
215 #endif
217 /* Value is TRUE if register/mode pair is acceptable. */
218 static bool rs6000_hard_regno_mode_ok_p
219 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
221 /* Maximum number of registers needed for a given register class and mode. */
222 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
224 /* How many registers are needed for a given register and mode. */
225 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
227 /* Map register number to register class. */
228 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
230 static int dbg_cost_ctrl;
232 /* Built in types. */
233 tree rs6000_builtin_types[RS6000_BTI_MAX];
234 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
236 /* Flag to say the TOC is initialized */
237 int toc_initialized, need_toc_init;
238 char toc_label_name[10];
240 /* Cached value of rs6000_variable_issue. This is cached in
241 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
242 static short cached_can_issue_more;
244 static GTY(()) section *read_only_data_section;
245 static GTY(()) section *private_data_section;
246 static GTY(()) section *tls_data_section;
247 static GTY(()) section *tls_private_data_section;
248 static GTY(()) section *read_only_private_data_section;
249 static GTY(()) section *sdata2_section;
250 static GTY(()) section *toc_section;
252 struct builtin_description
254 const HOST_WIDE_INT mask;
255 const enum insn_code icode;
256 const char *const name;
257 const enum rs6000_builtins code;
260 /* Describe the vector unit used for modes. */
261 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
262 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
264 /* Register classes for various constraints that are based on the target
265 switches. */
266 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
268 /* Describe the alignment of a vector. */
269 int rs6000_vector_align[NUM_MACHINE_MODES];
271 /* Map selected modes to types for builtins. */
272 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
274 /* What modes to automatically generate reciprocal divide estimate (fre) and
275 reciprocal sqrt (frsqrte) for. */
276 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
278 /* Masks to determine which reciprocal esitmate instructions to generate
279 automatically. */
280 enum rs6000_recip_mask {
281 RECIP_SF_DIV = 0x001, /* Use divide estimate */
282 RECIP_DF_DIV = 0x002,
283 RECIP_V4SF_DIV = 0x004,
284 RECIP_V2DF_DIV = 0x008,
286 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
287 RECIP_DF_RSQRT = 0x020,
288 RECIP_V4SF_RSQRT = 0x040,
289 RECIP_V2DF_RSQRT = 0x080,
291 /* Various combination of flags for -mrecip=xxx. */
292 RECIP_NONE = 0,
293 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
294 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
295 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
297 RECIP_HIGH_PRECISION = RECIP_ALL,
299 /* On low precision machines like the power5, don't enable double precision
300 reciprocal square root estimate, since it isn't accurate enough. */
301 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
304 /* -mrecip options. */
305 static struct
307 const char *string; /* option name */
308 unsigned int mask; /* mask bits to set */
309 } recip_options[] = {
310 { "all", RECIP_ALL },
311 { "none", RECIP_NONE },
312 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
313 | RECIP_V2DF_DIV) },
314 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
315 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
316 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
317 | RECIP_V2DF_RSQRT) },
318 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
319 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
322 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
323 static const struct
325 const char *cpu;
326 unsigned int cpuid;
327 } cpu_is_info[] = {
328 { "power9", PPC_PLATFORM_POWER9 },
329 { "power8", PPC_PLATFORM_POWER8 },
330 { "power7", PPC_PLATFORM_POWER7 },
331 { "power6x", PPC_PLATFORM_POWER6X },
332 { "power6", PPC_PLATFORM_POWER6 },
333 { "power5+", PPC_PLATFORM_POWER5_PLUS },
334 { "power5", PPC_PLATFORM_POWER5 },
335 { "ppc970", PPC_PLATFORM_PPC970 },
336 { "power4", PPC_PLATFORM_POWER4 },
337 { "ppca2", PPC_PLATFORM_PPCA2 },
338 { "ppc476", PPC_PLATFORM_PPC476 },
339 { "ppc464", PPC_PLATFORM_PPC464 },
340 { "ppc440", PPC_PLATFORM_PPC440 },
341 { "ppc405", PPC_PLATFORM_PPC405 },
342 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
345 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
346 static const struct
348 const char *hwcap;
349 int mask;
350 unsigned int id;
351 } cpu_supports_info[] = {
352 /* AT_HWCAP masks. */
353 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
354 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
355 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
356 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
357 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
358 { "booke", PPC_FEATURE_BOOKE, 0 },
359 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
360 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
361 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
362 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
363 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
364 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
365 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
366 { "notb", PPC_FEATURE_NO_TB, 0 },
367 { "pa6t", PPC_FEATURE_PA6T, 0 },
368 { "power4", PPC_FEATURE_POWER4, 0 },
369 { "power5", PPC_FEATURE_POWER5, 0 },
370 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
371 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
372 { "ppc32", PPC_FEATURE_32, 0 },
373 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
374 { "ppc64", PPC_FEATURE_64, 0 },
375 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
376 { "smt", PPC_FEATURE_SMT, 0 },
377 { "spe", PPC_FEATURE_HAS_SPE, 0 },
378 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
379 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
380 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
382 /* AT_HWCAP2 masks. */
383 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
384 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
385 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
386 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
387 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
388 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
389 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
390 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
391 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
392 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
395 /* Newer LIBCs explicitly export this symbol to declare that they provide
396 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
397 reference to this symbol whenever we expand a CPU builtin, so that
398 we never link against an old LIBC. */
399 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
401 /* True if we have expanded a CPU builtin. */
402 bool cpu_builtin_p;
404 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
405 macros that have changed. Languages that don't support the preprocessor
406 don't link in powerpcspe-c.c, so we can't call it directly. */
407 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
409 /* Simplfy register classes into simpler classifications. We assume
410 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
411 check for standard register classes (gpr/floating/altivec/vsx) and
412 floating/vector classes (float/altivec/vsx). */
414 enum rs6000_reg_type {
415 NO_REG_TYPE,
416 PSEUDO_REG_TYPE,
417 GPR_REG_TYPE,
418 VSX_REG_TYPE,
419 ALTIVEC_REG_TYPE,
420 FPR_REG_TYPE,
421 SPR_REG_TYPE,
422 CR_REG_TYPE,
423 SPE_ACC_TYPE,
424 SPEFSCR_REG_TYPE
427 /* Map register class to register type. */
428 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
430 /* First/last register type for the 'normal' register types (i.e. general
431 purpose, floating point, altivec, and VSX registers). */
432 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
434 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
437 /* Register classes we care about in secondary reload or go if legitimate
438 address. We only need to worry about GPR, FPR, and Altivec registers here,
439 along an ANY field that is the OR of the 3 register classes. */
441 enum rs6000_reload_reg_type {
442 RELOAD_REG_GPR, /* General purpose registers. */
443 RELOAD_REG_FPR, /* Traditional floating point regs. */
444 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
445 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
446 N_RELOAD_REG
449 /* For setting up register classes, loop through the 3 register classes mapping
450 into real registers, and skip the ANY class, which is just an OR of the
451 bits. */
452 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
453 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
455 /* Map reload register type to a register in the register class. */
456 struct reload_reg_map_type {
457 const char *name; /* Register class name. */
458 int reg; /* Register in the register class. */
461 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
462 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
463 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
464 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
465 { "Any", -1 }, /* RELOAD_REG_ANY. */
468 /* Mask bits for each register class, indexed per mode. Historically the
469 compiler has been more restrictive which types can do PRE_MODIFY instead of
470 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
471 typedef unsigned char addr_mask_type;
473 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
474 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
475 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
476 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
477 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
478 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
479 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
480 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
482 /* Register type masks based on the type, of valid addressing modes. */
483 struct rs6000_reg_addr {
484 enum insn_code reload_load; /* INSN to reload for loading. */
485 enum insn_code reload_store; /* INSN to reload for storing. */
486 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
487 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
488 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
489 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
490 /* INSNs for fusing addi with loads
491 or stores for each reg. class. */
492 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
493 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
494 /* INSNs for fusing addis with loads
495 or stores for each reg. class. */
496 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
497 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
498 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
499 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
500 bool fused_toc; /* Mode supports TOC fusion. */
503 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
505 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
506 static inline bool
507 mode_supports_pre_incdec_p (machine_mode mode)
509 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
510 != 0);
513 /* Helper function to say whether a mode supports PRE_MODIFY. */
514 static inline bool
515 mode_supports_pre_modify_p (machine_mode mode)
517 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
518 != 0);
521 /* Given that there exists at least one variable that is set (produced)
522 by OUT_INSN and read (consumed) by IN_INSN, return true iff
523 IN_INSN represents one or more memory store operations and none of
524 the variables set by OUT_INSN is used by IN_INSN as the address of a
525 store operation. If either IN_INSN or OUT_INSN does not represent
526 a "single" RTL SET expression (as loosely defined by the
527 implementation of the single_set function) or a PARALLEL with only
528 SETs, CLOBBERs, and USEs inside, this function returns false.
530 This rs6000-specific version of store_data_bypass_p checks for
531 certain conditions that result in assertion failures (and internal
532 compiler errors) in the generic store_data_bypass_p function and
533 returns false rather than calling store_data_bypass_p if one of the
534 problematic conditions is detected. */
537 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
539 rtx out_set, in_set;
540 rtx out_pat, in_pat;
541 rtx out_exp, in_exp;
542 int i, j;
544 in_set = single_set (in_insn);
545 if (in_set)
547 if (MEM_P (SET_DEST (in_set)))
549 out_set = single_set (out_insn);
550 if (!out_set)
552 out_pat = PATTERN (out_insn);
553 if (GET_CODE (out_pat) == PARALLEL)
555 for (i = 0; i < XVECLEN (out_pat, 0); i++)
557 out_exp = XVECEXP (out_pat, 0, i);
558 if ((GET_CODE (out_exp) == CLOBBER)
559 || (GET_CODE (out_exp) == USE))
560 continue;
561 else if (GET_CODE (out_exp) != SET)
562 return false;
568 else
570 in_pat = PATTERN (in_insn);
571 if (GET_CODE (in_pat) != PARALLEL)
572 return false;
574 for (i = 0; i < XVECLEN (in_pat, 0); i++)
576 in_exp = XVECEXP (in_pat, 0, i);
577 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
578 continue;
579 else if (GET_CODE (in_exp) != SET)
580 return false;
582 if (MEM_P (SET_DEST (in_exp)))
584 out_set = single_set (out_insn);
585 if (!out_set)
587 out_pat = PATTERN (out_insn);
588 if (GET_CODE (out_pat) != PARALLEL)
589 return false;
590 for (j = 0; j < XVECLEN (out_pat, 0); j++)
592 out_exp = XVECEXP (out_pat, 0, j);
593 if ((GET_CODE (out_exp) == CLOBBER)
594 || (GET_CODE (out_exp) == USE))
595 continue;
596 else if (GET_CODE (out_exp) != SET)
597 return false;
603 return store_data_bypass_p (out_insn, in_insn);
606 /* Return true if we have D-form addressing in altivec registers. */
607 static inline bool
608 mode_supports_vmx_dform (machine_mode mode)
610 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
613 /* Return true if we have D-form addressing in VSX registers. This addressing
614 is more limited than normal d-form addressing in that the offset must be
615 aligned on a 16-byte boundary. */
616 static inline bool
617 mode_supports_vsx_dform_quad (machine_mode mode)
619 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
620 != 0);
624 /* Target cpu costs. */
626 struct processor_costs {
627 const int mulsi; /* cost of SImode multiplication. */
628 const int mulsi_const; /* cost of SImode multiplication by constant. */
629 const int mulsi_const9; /* cost of SImode mult by short constant. */
630 const int muldi; /* cost of DImode multiplication. */
631 const int divsi; /* cost of SImode division. */
632 const int divdi; /* cost of DImode division. */
633 const int fp; /* cost of simple SFmode and DFmode insns. */
634 const int dmul; /* cost of DFmode multiplication (and fmadd). */
635 const int sdiv; /* cost of SFmode division (fdivs). */
636 const int ddiv; /* cost of DFmode division (fdiv). */
637 const int cache_line_size; /* cache line size in bytes. */
638 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
639 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
640 const int simultaneous_prefetches; /* number of parallel prefetch
641 operations. */
642 const int sfdf_convert; /* cost of SF->DF conversion. */
645 const struct processor_costs *rs6000_cost;
647 /* Processor costs (relative to an add) */
649 /* Instruction size costs on 32bit processors. */
650 static const
651 struct processor_costs size32_cost = {
652 COSTS_N_INSNS (1), /* mulsi */
653 COSTS_N_INSNS (1), /* mulsi_const */
654 COSTS_N_INSNS (1), /* mulsi_const9 */
655 COSTS_N_INSNS (1), /* muldi */
656 COSTS_N_INSNS (1), /* divsi */
657 COSTS_N_INSNS (1), /* divdi */
658 COSTS_N_INSNS (1), /* fp */
659 COSTS_N_INSNS (1), /* dmul */
660 COSTS_N_INSNS (1), /* sdiv */
661 COSTS_N_INSNS (1), /* ddiv */
662 32, /* cache line size */
663 0, /* l1 cache */
664 0, /* l2 cache */
665 0, /* streams */
666 0, /* SF->DF convert */
669 /* Instruction size costs on 64bit processors. */
670 static const
671 struct processor_costs size64_cost = {
672 COSTS_N_INSNS (1), /* mulsi */
673 COSTS_N_INSNS (1), /* mulsi_const */
674 COSTS_N_INSNS (1), /* mulsi_const9 */
675 COSTS_N_INSNS (1), /* muldi */
676 COSTS_N_INSNS (1), /* divsi */
677 COSTS_N_INSNS (1), /* divdi */
678 COSTS_N_INSNS (1), /* fp */
679 COSTS_N_INSNS (1), /* dmul */
680 COSTS_N_INSNS (1), /* sdiv */
681 COSTS_N_INSNS (1), /* ddiv */
682 128, /* cache line size */
683 0, /* l1 cache */
684 0, /* l2 cache */
685 0, /* streams */
686 0, /* SF->DF convert */
689 /* Instruction costs on RS64A processors. */
690 static const
691 struct processor_costs rs64a_cost = {
692 COSTS_N_INSNS (20), /* mulsi */
693 COSTS_N_INSNS (12), /* mulsi_const */
694 COSTS_N_INSNS (8), /* mulsi_const9 */
695 COSTS_N_INSNS (34), /* muldi */
696 COSTS_N_INSNS (65), /* divsi */
697 COSTS_N_INSNS (67), /* divdi */
698 COSTS_N_INSNS (4), /* fp */
699 COSTS_N_INSNS (4), /* dmul */
700 COSTS_N_INSNS (31), /* sdiv */
701 COSTS_N_INSNS (31), /* ddiv */
702 128, /* cache line size */
703 128, /* l1 cache */
704 2048, /* l2 cache */
705 1, /* streams */
706 0, /* SF->DF convert */
709 /* Instruction costs on MPCCORE processors. */
710 static const
711 struct processor_costs mpccore_cost = {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (6), /* divsi */
717 COSTS_N_INSNS (6), /* divdi */
718 COSTS_N_INSNS (4), /* fp */
719 COSTS_N_INSNS (5), /* dmul */
720 COSTS_N_INSNS (10), /* sdiv */
721 COSTS_N_INSNS (17), /* ddiv */
722 32, /* cache line size */
723 4, /* l1 cache */
724 16, /* l2 cache */
725 1, /* streams */
726 0, /* SF->DF convert */
729 /* Instruction costs on PPC403 processors. */
730 static const
731 struct processor_costs ppc403_cost = {
732 COSTS_N_INSNS (4), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (4), /* mulsi_const9 */
735 COSTS_N_INSNS (4), /* muldi */
736 COSTS_N_INSNS (33), /* divsi */
737 COSTS_N_INSNS (33), /* divdi */
738 COSTS_N_INSNS (11), /* fp */
739 COSTS_N_INSNS (11), /* dmul */
740 COSTS_N_INSNS (11), /* sdiv */
741 COSTS_N_INSNS (11), /* ddiv */
742 32, /* cache line size */
743 4, /* l1 cache */
744 16, /* l2 cache */
745 1, /* streams */
746 0, /* SF->DF convert */
749 /* Instruction costs on PPC405 processors. */
750 static const
751 struct processor_costs ppc405_cost = {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (5), /* muldi */
756 COSTS_N_INSNS (35), /* divsi */
757 COSTS_N_INSNS (35), /* divdi */
758 COSTS_N_INSNS (11), /* fp */
759 COSTS_N_INSNS (11), /* dmul */
760 COSTS_N_INSNS (11), /* sdiv */
761 COSTS_N_INSNS (11), /* ddiv */
762 32, /* cache line size */
763 16, /* l1 cache */
764 128, /* l2 cache */
765 1, /* streams */
766 0, /* SF->DF convert */
769 /* Instruction costs on PPC440 processors. */
770 static const
771 struct processor_costs ppc440_cost = {
772 COSTS_N_INSNS (3), /* mulsi */
773 COSTS_N_INSNS (2), /* mulsi_const */
774 COSTS_N_INSNS (2), /* mulsi_const9 */
775 COSTS_N_INSNS (3), /* muldi */
776 COSTS_N_INSNS (34), /* divsi */
777 COSTS_N_INSNS (34), /* divdi */
778 COSTS_N_INSNS (5), /* fp */
779 COSTS_N_INSNS (5), /* dmul */
780 COSTS_N_INSNS (19), /* sdiv */
781 COSTS_N_INSNS (33), /* ddiv */
782 32, /* cache line size */
783 32, /* l1 cache */
784 256, /* l2 cache */
785 1, /* streams */
786 0, /* SF->DF convert */
789 /* Instruction costs on PPC476 processors. */
790 static const
791 struct processor_costs ppc476_cost = {
792 COSTS_N_INSNS (4), /* mulsi */
793 COSTS_N_INSNS (4), /* mulsi_const */
794 COSTS_N_INSNS (4), /* mulsi_const9 */
795 COSTS_N_INSNS (4), /* muldi */
796 COSTS_N_INSNS (11), /* divsi */
797 COSTS_N_INSNS (11), /* divdi */
798 COSTS_N_INSNS (6), /* fp */
799 COSTS_N_INSNS (6), /* dmul */
800 COSTS_N_INSNS (19), /* sdiv */
801 COSTS_N_INSNS (33), /* ddiv */
802 32, /* l1 cache line size */
803 32, /* l1 cache */
804 512, /* l2 cache */
805 1, /* streams */
806 0, /* SF->DF convert */
809 /* Instruction costs on PPC601 processors. */
810 static const
811 struct processor_costs ppc601_cost = {
812 COSTS_N_INSNS (5), /* mulsi */
813 COSTS_N_INSNS (5), /* mulsi_const */
814 COSTS_N_INSNS (5), /* mulsi_const9 */
815 COSTS_N_INSNS (5), /* muldi */
816 COSTS_N_INSNS (36), /* divsi */
817 COSTS_N_INSNS (36), /* divdi */
818 COSTS_N_INSNS (4), /* fp */
819 COSTS_N_INSNS (5), /* dmul */
820 COSTS_N_INSNS (17), /* sdiv */
821 COSTS_N_INSNS (31), /* ddiv */
822 32, /* cache line size */
823 32, /* l1 cache */
824 256, /* l2 cache */
825 1, /* streams */
826 0, /* SF->DF convert */
829 /* Instruction costs on PPC603 processors. */
830 static const
831 struct processor_costs ppc603_cost = {
832 COSTS_N_INSNS (5), /* mulsi */
833 COSTS_N_INSNS (3), /* mulsi_const */
834 COSTS_N_INSNS (2), /* mulsi_const9 */
835 COSTS_N_INSNS (5), /* muldi */
836 COSTS_N_INSNS (37), /* divsi */
837 COSTS_N_INSNS (37), /* divdi */
838 COSTS_N_INSNS (3), /* fp */
839 COSTS_N_INSNS (4), /* dmul */
840 COSTS_N_INSNS (18), /* sdiv */
841 COSTS_N_INSNS (33), /* ddiv */
842 32, /* cache line size */
843 8, /* l1 cache */
844 64, /* l2 cache */
845 1, /* streams */
846 0, /* SF->DF convert */
849 /* Instruction costs on PPC604 processors. */
850 static const
851 struct processor_costs ppc604_cost = {
852 COSTS_N_INSNS (4), /* mulsi */
853 COSTS_N_INSNS (4), /* mulsi_const */
854 COSTS_N_INSNS (4), /* mulsi_const9 */
855 COSTS_N_INSNS (4), /* muldi */
856 COSTS_N_INSNS (20), /* divsi */
857 COSTS_N_INSNS (20), /* divdi */
858 COSTS_N_INSNS (3), /* fp */
859 COSTS_N_INSNS (3), /* dmul */
860 COSTS_N_INSNS (18), /* sdiv */
861 COSTS_N_INSNS (32), /* ddiv */
862 32, /* cache line size */
863 16, /* l1 cache */
864 512, /* l2 cache */
865 1, /* streams */
866 0, /* SF->DF convert */
869 /* Instruction costs on PPC604e processors. */
870 static const
871 struct processor_costs ppc604e_cost = {
872 COSTS_N_INSNS (2), /* mulsi */
873 COSTS_N_INSNS (2), /* mulsi_const */
874 COSTS_N_INSNS (2), /* mulsi_const9 */
875 COSTS_N_INSNS (2), /* muldi */
876 COSTS_N_INSNS (20), /* divsi */
877 COSTS_N_INSNS (20), /* divdi */
878 COSTS_N_INSNS (3), /* fp */
879 COSTS_N_INSNS (3), /* dmul */
880 COSTS_N_INSNS (18), /* sdiv */
881 COSTS_N_INSNS (32), /* ddiv */
882 32, /* cache line size */
883 32, /* l1 cache */
884 1024, /* l2 cache */
885 1, /* streams */
886 0, /* SF->DF convert */
889 /* Instruction costs on PPC620 processors. */
890 static const
891 struct processor_costs ppc620_cost = {
892 COSTS_N_INSNS (5), /* mulsi */
893 COSTS_N_INSNS (4), /* mulsi_const */
894 COSTS_N_INSNS (3), /* mulsi_const9 */
895 COSTS_N_INSNS (7), /* muldi */
896 COSTS_N_INSNS (21), /* divsi */
897 COSTS_N_INSNS (37), /* divdi */
898 COSTS_N_INSNS (3), /* fp */
899 COSTS_N_INSNS (3), /* dmul */
900 COSTS_N_INSNS (18), /* sdiv */
901 COSTS_N_INSNS (32), /* ddiv */
902 128, /* cache line size */
903 32, /* l1 cache */
904 1024, /* l2 cache */
905 1, /* streams */
906 0, /* SF->DF convert */
909 /* Instruction costs on PPC630 processors. */
910 static const
911 struct processor_costs ppc630_cost = {
912 COSTS_N_INSNS (5), /* mulsi */
913 COSTS_N_INSNS (4), /* mulsi_const */
914 COSTS_N_INSNS (3), /* mulsi_const9 */
915 COSTS_N_INSNS (7), /* muldi */
916 COSTS_N_INSNS (21), /* divsi */
917 COSTS_N_INSNS (37), /* divdi */
918 COSTS_N_INSNS (3), /* fp */
919 COSTS_N_INSNS (3), /* dmul */
920 COSTS_N_INSNS (17), /* sdiv */
921 COSTS_N_INSNS (21), /* ddiv */
922 128, /* cache line size */
923 64, /* l1 cache */
924 1024, /* l2 cache */
925 1, /* streams */
926 0, /* SF->DF convert */
929 /* Instruction costs on Cell processor. */
930 /* COSTS_N_INSNS (1) ~ one add. */
931 static const
932 struct processor_costs ppccell_cost = {
933 COSTS_N_INSNS (9/2)+2, /* mulsi */
934 COSTS_N_INSNS (6/2), /* mulsi_const */
935 COSTS_N_INSNS (6/2), /* mulsi_const9 */
936 COSTS_N_INSNS (15/2)+2, /* muldi */
937 COSTS_N_INSNS (38/2), /* divsi */
938 COSTS_N_INSNS (70/2), /* divdi */
939 COSTS_N_INSNS (10/2), /* fp */
940 COSTS_N_INSNS (10/2), /* dmul */
941 COSTS_N_INSNS (74/2), /* sdiv */
942 COSTS_N_INSNS (74/2), /* ddiv */
943 128, /* cache line size */
944 32, /* l1 cache */
945 512, /* l2 cache */
946 6, /* streams */
947 0, /* SF->DF convert */
950 /* Instruction costs on PPC750 and PPC7400 processors. */
951 static const
952 struct processor_costs ppc750_cost = {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (3), /* mulsi_const */
955 COSTS_N_INSNS (2), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (17), /* divsi */
958 COSTS_N_INSNS (17), /* divdi */
959 COSTS_N_INSNS (3), /* fp */
960 COSTS_N_INSNS (3), /* dmul */
961 COSTS_N_INSNS (17), /* sdiv */
962 COSTS_N_INSNS (31), /* ddiv */
963 32, /* cache line size */
964 32, /* l1 cache */
965 512, /* l2 cache */
966 1, /* streams */
967 0, /* SF->DF convert */
970 /* Instruction costs on PPC7450 processors. */
971 static const
972 struct processor_costs ppc7450_cost = {
973 COSTS_N_INSNS (4), /* mulsi */
974 COSTS_N_INSNS (3), /* mulsi_const */
975 COSTS_N_INSNS (3), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (23), /* divsi */
978 COSTS_N_INSNS (23), /* divdi */
979 COSTS_N_INSNS (5), /* fp */
980 COSTS_N_INSNS (5), /* dmul */
981 COSTS_N_INSNS (21), /* sdiv */
982 COSTS_N_INSNS (35), /* ddiv */
983 32, /* cache line size */
984 32, /* l1 cache */
985 1024, /* l2 cache */
986 1, /* streams */
987 0, /* SF->DF convert */
990 /* Instruction costs on PPC8540 processors. */
991 static const
992 struct processor_costs ppc8540_cost = {
993 COSTS_N_INSNS (4), /* mulsi */
994 COSTS_N_INSNS (4), /* mulsi_const */
995 COSTS_N_INSNS (4), /* mulsi_const9 */
996 COSTS_N_INSNS (4), /* muldi */
997 COSTS_N_INSNS (19), /* divsi */
998 COSTS_N_INSNS (19), /* divdi */
999 COSTS_N_INSNS (4), /* fp */
1000 COSTS_N_INSNS (4), /* dmul */
1001 COSTS_N_INSNS (29), /* sdiv */
1002 COSTS_N_INSNS (29), /* ddiv */
1003 32, /* cache line size */
1004 32, /* l1 cache */
1005 256, /* l2 cache */
1006 1, /* prefetch streams /*/
1007 0, /* SF->DF convert */
1010 /* Instruction costs on E300C2 and E300C3 cores. */
1011 static const
1012 struct processor_costs ppce300c2c3_cost = {
1013 COSTS_N_INSNS (4), /* mulsi */
1014 COSTS_N_INSNS (4), /* mulsi_const */
1015 COSTS_N_INSNS (4), /* mulsi_const9 */
1016 COSTS_N_INSNS (4), /* muldi */
1017 COSTS_N_INSNS (19), /* divsi */
1018 COSTS_N_INSNS (19), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (4), /* dmul */
1021 COSTS_N_INSNS (18), /* sdiv */
1022 COSTS_N_INSNS (33), /* ddiv */
1024 16, /* l1 cache */
1025 16, /* l2 cache */
1026 1, /* prefetch streams /*/
1027 0, /* SF->DF convert */
1030 /* Instruction costs on PPCE500MC processors. */
1031 static const
1032 struct processor_costs ppce500mc_cost = {
1033 COSTS_N_INSNS (4), /* mulsi */
1034 COSTS_N_INSNS (4), /* mulsi_const */
1035 COSTS_N_INSNS (4), /* mulsi_const9 */
1036 COSTS_N_INSNS (4), /* muldi */
1037 COSTS_N_INSNS (14), /* divsi */
1038 COSTS_N_INSNS (14), /* divdi */
1039 COSTS_N_INSNS (8), /* fp */
1040 COSTS_N_INSNS (10), /* dmul */
1041 COSTS_N_INSNS (36), /* sdiv */
1042 COSTS_N_INSNS (66), /* ddiv */
1043 64, /* cache line size */
1044 32, /* l1 cache */
1045 128, /* l2 cache */
1046 1, /* prefetch streams /*/
1047 0, /* SF->DF convert */
1050 /* Instruction costs on PPCE500MC64 processors. */
1051 static const
1052 struct processor_costs ppce500mc64_cost = {
1053 COSTS_N_INSNS (4), /* mulsi */
1054 COSTS_N_INSNS (4), /* mulsi_const */
1055 COSTS_N_INSNS (4), /* mulsi_const9 */
1056 COSTS_N_INSNS (4), /* muldi */
1057 COSTS_N_INSNS (14), /* divsi */
1058 COSTS_N_INSNS (14), /* divdi */
1059 COSTS_N_INSNS (4), /* fp */
1060 COSTS_N_INSNS (10), /* dmul */
1061 COSTS_N_INSNS (36), /* sdiv */
1062 COSTS_N_INSNS (66), /* ddiv */
1063 64, /* cache line size */
1064 32, /* l1 cache */
1065 128, /* l2 cache */
1066 1, /* prefetch streams /*/
1067 0, /* SF->DF convert */
1070 /* Instruction costs on PPCE5500 processors. */
1071 static const
1072 struct processor_costs ppce5500_cost = {
1073 COSTS_N_INSNS (5), /* mulsi */
1074 COSTS_N_INSNS (5), /* mulsi_const */
1075 COSTS_N_INSNS (4), /* mulsi_const9 */
1076 COSTS_N_INSNS (5), /* muldi */
1077 COSTS_N_INSNS (14), /* divsi */
1078 COSTS_N_INSNS (14), /* divdi */
1079 COSTS_N_INSNS (7), /* fp */
1080 COSTS_N_INSNS (10), /* dmul */
1081 COSTS_N_INSNS (36), /* sdiv */
1082 COSTS_N_INSNS (66), /* ddiv */
1083 64, /* cache line size */
1084 32, /* l1 cache */
1085 128, /* l2 cache */
1086 1, /* prefetch streams /*/
1087 0, /* SF->DF convert */
1090 /* Instruction costs on PPCE6500 processors. */
1091 static const
1092 struct processor_costs ppce6500_cost = {
1093 COSTS_N_INSNS (5), /* mulsi */
1094 COSTS_N_INSNS (5), /* mulsi_const */
1095 COSTS_N_INSNS (4), /* mulsi_const9 */
1096 COSTS_N_INSNS (5), /* muldi */
1097 COSTS_N_INSNS (14), /* divsi */
1098 COSTS_N_INSNS (14), /* divdi */
1099 COSTS_N_INSNS (7), /* fp */
1100 COSTS_N_INSNS (10), /* dmul */
1101 COSTS_N_INSNS (36), /* sdiv */
1102 COSTS_N_INSNS (66), /* ddiv */
1103 64, /* cache line size */
1104 32, /* l1 cache */
1105 128, /* l2 cache */
1106 1, /* prefetch streams /*/
1107 0, /* SF->DF convert */
1110 /* Instruction costs on AppliedMicro Titan processors. */
1111 static const
1112 struct processor_costs titan_cost = {
1113 COSTS_N_INSNS (5), /* mulsi */
1114 COSTS_N_INSNS (5), /* mulsi_const */
1115 COSTS_N_INSNS (5), /* mulsi_const9 */
1116 COSTS_N_INSNS (5), /* muldi */
1117 COSTS_N_INSNS (18), /* divsi */
1118 COSTS_N_INSNS (18), /* divdi */
1119 COSTS_N_INSNS (10), /* fp */
1120 COSTS_N_INSNS (10), /* dmul */
1121 COSTS_N_INSNS (46), /* sdiv */
1122 COSTS_N_INSNS (72), /* ddiv */
1123 32, /* cache line size */
1124 32, /* l1 cache */
1125 512, /* l2 cache */
1126 1, /* prefetch streams /*/
1127 0, /* SF->DF convert */
1130 /* Instruction costs on POWER4 and POWER5 processors. */
1131 static const
1132 struct processor_costs power4_cost = {
1133 COSTS_N_INSNS (3), /* mulsi */
1134 COSTS_N_INSNS (2), /* mulsi_const */
1135 COSTS_N_INSNS (2), /* mulsi_const9 */
1136 COSTS_N_INSNS (4), /* muldi */
1137 COSTS_N_INSNS (18), /* divsi */
1138 COSTS_N_INSNS (34), /* divdi */
1139 COSTS_N_INSNS (3), /* fp */
1140 COSTS_N_INSNS (3), /* dmul */
1141 COSTS_N_INSNS (17), /* sdiv */
1142 COSTS_N_INSNS (17), /* ddiv */
1143 128, /* cache line size */
1144 32, /* l1 cache */
1145 1024, /* l2 cache */
1146 8, /* prefetch streams /*/
1147 0, /* SF->DF convert */
1150 /* Instruction costs on POWER6 processors. */
1151 static const
1152 struct processor_costs power6_cost = {
1153 COSTS_N_INSNS (8), /* mulsi */
1154 COSTS_N_INSNS (8), /* mulsi_const */
1155 COSTS_N_INSNS (8), /* mulsi_const9 */
1156 COSTS_N_INSNS (8), /* muldi */
1157 COSTS_N_INSNS (22), /* divsi */
1158 COSTS_N_INSNS (28), /* divdi */
1159 COSTS_N_INSNS (3), /* fp */
1160 COSTS_N_INSNS (3), /* dmul */
1161 COSTS_N_INSNS (13), /* sdiv */
1162 COSTS_N_INSNS (16), /* ddiv */
1163 128, /* cache line size */
1164 64, /* l1 cache */
1165 2048, /* l2 cache */
1166 16, /* prefetch streams */
1167 0, /* SF->DF convert */
1170 /* Instruction costs on POWER7 processors. */
1171 static const
1172 struct processor_costs power7_cost = {
1173 COSTS_N_INSNS (2), /* mulsi */
1174 COSTS_N_INSNS (2), /* mulsi_const */
1175 COSTS_N_INSNS (2), /* mulsi_const9 */
1176 COSTS_N_INSNS (2), /* muldi */
1177 COSTS_N_INSNS (18), /* divsi */
1178 COSTS_N_INSNS (34), /* divdi */
1179 COSTS_N_INSNS (3), /* fp */
1180 COSTS_N_INSNS (3), /* dmul */
1181 COSTS_N_INSNS (13), /* sdiv */
1182 COSTS_N_INSNS (16), /* ddiv */
1183 128, /* cache line size */
1184 32, /* l1 cache */
1185 256, /* l2 cache */
1186 12, /* prefetch streams */
1187 COSTS_N_INSNS (3), /* SF->DF convert */
1190 /* Instruction costs on POWER8 processors. */
1191 static const
1192 struct processor_costs power8_cost = {
1193 COSTS_N_INSNS (3), /* mulsi */
1194 COSTS_N_INSNS (3), /* mulsi_const */
1195 COSTS_N_INSNS (3), /* mulsi_const9 */
1196 COSTS_N_INSNS (3), /* muldi */
1197 COSTS_N_INSNS (19), /* divsi */
1198 COSTS_N_INSNS (35), /* divdi */
1199 COSTS_N_INSNS (3), /* fp */
1200 COSTS_N_INSNS (3), /* dmul */
1201 COSTS_N_INSNS (14), /* sdiv */
1202 COSTS_N_INSNS (17), /* ddiv */
1203 128, /* cache line size */
1204 32, /* l1 cache */
1205 256, /* l2 cache */
1206 12, /* prefetch streams */
1207 COSTS_N_INSNS (3), /* SF->DF convert */
1210 /* Instruction costs on POWER9 processors. */
1211 static const
1212 struct processor_costs power9_cost = {
1213 COSTS_N_INSNS (3), /* mulsi */
1214 COSTS_N_INSNS (3), /* mulsi_const */
1215 COSTS_N_INSNS (3), /* mulsi_const9 */
1216 COSTS_N_INSNS (3), /* muldi */
1217 COSTS_N_INSNS (8), /* divsi */
1218 COSTS_N_INSNS (12), /* divdi */
1219 COSTS_N_INSNS (3), /* fp */
1220 COSTS_N_INSNS (3), /* dmul */
1221 COSTS_N_INSNS (13), /* sdiv */
1222 COSTS_N_INSNS (18), /* ddiv */
1223 128, /* cache line size */
1224 32, /* l1 cache */
1225 512, /* l2 cache */
1226 8, /* prefetch streams */
1227 COSTS_N_INSNS (3), /* SF->DF convert */
1230 /* Instruction costs on POWER A2 processors. */
1231 static const
1232 struct processor_costs ppca2_cost = {
1233 COSTS_N_INSNS (16), /* mulsi */
1234 COSTS_N_INSNS (16), /* mulsi_const */
1235 COSTS_N_INSNS (16), /* mulsi_const9 */
1236 COSTS_N_INSNS (16), /* muldi */
1237 COSTS_N_INSNS (22), /* divsi */
1238 COSTS_N_INSNS (28), /* divdi */
1239 COSTS_N_INSNS (3), /* fp */
1240 COSTS_N_INSNS (3), /* dmul */
1241 COSTS_N_INSNS (59), /* sdiv */
1242 COSTS_N_INSNS (72), /* ddiv */
1244 16, /* l1 cache */
1245 2048, /* l2 cache */
1246 16, /* prefetch streams */
1247 0, /* SF->DF convert */
1251 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1252 #undef RS6000_BUILTIN_0
1253 #undef RS6000_BUILTIN_1
1254 #undef RS6000_BUILTIN_2
1255 #undef RS6000_BUILTIN_3
1256 #undef RS6000_BUILTIN_A
1257 #undef RS6000_BUILTIN_D
1258 #undef RS6000_BUILTIN_E
1259 #undef RS6000_BUILTIN_H
1260 #undef RS6000_BUILTIN_P
1261 #undef RS6000_BUILTIN_Q
1262 #undef RS6000_BUILTIN_S
1263 #undef RS6000_BUILTIN_X
1265 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1266 { NAME, ICODE, MASK, ATTR },
1268 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1269 { NAME, ICODE, MASK, ATTR },
1271 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1272 { NAME, ICODE, MASK, ATTR },
1274 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1275 { NAME, ICODE, MASK, ATTR },
1277 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1280 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1283 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1286 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1296 { NAME, ICODE, MASK, ATTR },
1298 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1299 { NAME, ICODE, MASK, ATTR },
1301 struct rs6000_builtin_info_type {
1302 const char *name;
1303 const enum insn_code icode;
1304 const HOST_WIDE_INT mask;
1305 const unsigned attr;
1308 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1310 #include "powerpcspe-builtin.def"
1313 #undef RS6000_BUILTIN_0
1314 #undef RS6000_BUILTIN_1
1315 #undef RS6000_BUILTIN_2
1316 #undef RS6000_BUILTIN_3
1317 #undef RS6000_BUILTIN_A
1318 #undef RS6000_BUILTIN_D
1319 #undef RS6000_BUILTIN_E
1320 #undef RS6000_BUILTIN_H
1321 #undef RS6000_BUILTIN_P
1322 #undef RS6000_BUILTIN_Q
1323 #undef RS6000_BUILTIN_S
1324 #undef RS6000_BUILTIN_X
1326 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1327 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1330 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1331 static bool spe_func_has_64bit_regs_p (void);
1332 static struct machine_function * rs6000_init_machine_status (void);
1333 static int rs6000_ra_ever_killed (void);
1334 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1336 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1337 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1338 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1339 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1340 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1341 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1342 bool);
1343 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1344 unsigned int);
1345 static bool is_microcoded_insn (rtx_insn *);
1346 static bool is_nonpipeline_insn (rtx_insn *);
1347 static bool is_cracked_insn (rtx_insn *);
1348 static bool is_load_insn (rtx, rtx *);
1349 static bool is_store_insn (rtx, rtx *);
1350 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1351 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1352 static bool insn_must_be_first_in_group (rtx_insn *);
1353 static bool insn_must_be_last_in_group (rtx_insn *);
1354 static void altivec_init_builtins (void);
1355 static tree builtin_function_type (machine_mode, machine_mode,
1356 machine_mode, machine_mode,
1357 enum rs6000_builtins, const char *name);
1358 static void rs6000_common_init_builtins (void);
1359 static void paired_init_builtins (void);
1360 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1361 static void spe_init_builtins (void);
1362 static void htm_init_builtins (void);
1363 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1364 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1365 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1366 static rs6000_stack_t *rs6000_stack_info (void);
1367 static void is_altivec_return_reg (rtx, void *);
1368 int easy_vector_constant (rtx, machine_mode);
1369 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1370 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1371 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1372 bool, bool);
1373 #if TARGET_MACHO
1374 static void macho_branch_islands (void);
1375 #endif
1376 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1377 int, int *);
1378 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1379 int, int, int *);
1380 static bool rs6000_mode_dependent_address (const_rtx);
1381 static bool rs6000_debug_mode_dependent_address (const_rtx);
1382 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1383 machine_mode, rtx);
1384 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1385 machine_mode,
1386 rtx);
1387 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1388 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1389 enum reg_class);
1390 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1391 reg_class_t,
1392 reg_class_t);
1393 static bool rs6000_debug_can_change_mode_class (machine_mode,
1394 machine_mode,
1395 reg_class_t);
1396 static bool rs6000_save_toc_in_prologue_p (void);
1397 static rtx rs6000_internal_arg_pointer (void);
1399 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1400 int, int *)
1401 = rs6000_legitimize_reload_address;
1403 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1404 = rs6000_mode_dependent_address;
1406 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1407 machine_mode, rtx)
1408 = rs6000_secondary_reload_class;
1410 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1411 = rs6000_preferred_reload_class;
1413 const int INSN_NOT_AVAILABLE = -1;
1415 static void rs6000_print_isa_options (FILE *, int, const char *,
1416 HOST_WIDE_INT);
1417 static void rs6000_print_builtin_options (FILE *, int, const char *,
1418 HOST_WIDE_INT);
1419 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1421 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1422 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1423 enum rs6000_reg_type,
1424 machine_mode,
1425 secondary_reload_info *,
1426 bool);
1427 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1428 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1429 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1431 /* Hash table stuff for keeping track of TOC entries. */
1433 struct GTY((for_user)) toc_hash_struct
1435 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1436 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1437 rtx key;
1438 machine_mode key_mode;
1439 int labelno;
1442 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1444 static hashval_t hash (toc_hash_struct *);
1445 static bool equal (toc_hash_struct *, toc_hash_struct *);
1448 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1450 /* Hash table to keep track of the argument types for builtin functions. */
1452 struct GTY((for_user)) builtin_hash_struct
1454 tree type;
1455 machine_mode mode[4]; /* return value + 3 arguments. */
1456 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1459 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1461 static hashval_t hash (builtin_hash_struct *);
1462 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1465 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1468 /* Default register names. */
1469 char rs6000_reg_names[][8] =
1471 "0", "1", "2", "3", "4", "5", "6", "7",
1472 "8", "9", "10", "11", "12", "13", "14", "15",
1473 "16", "17", "18", "19", "20", "21", "22", "23",
1474 "24", "25", "26", "27", "28", "29", "30", "31",
1475 "0", "1", "2", "3", "4", "5", "6", "7",
1476 "8", "9", "10", "11", "12", "13", "14", "15",
1477 "16", "17", "18", "19", "20", "21", "22", "23",
1478 "24", "25", "26", "27", "28", "29", "30", "31",
1479 "mq", "lr", "ctr","ap",
1480 "0", "1", "2", "3", "4", "5", "6", "7",
1481 "ca",
1482 /* AltiVec registers. */
1483 "0", "1", "2", "3", "4", "5", "6", "7",
1484 "8", "9", "10", "11", "12", "13", "14", "15",
1485 "16", "17", "18", "19", "20", "21", "22", "23",
1486 "24", "25", "26", "27", "28", "29", "30", "31",
1487 "vrsave", "vscr",
1488 /* SPE registers. */
1489 "spe_acc", "spefscr",
1490 /* Soft frame pointer. */
1491 "sfp",
1492 /* HTM SPR registers. */
1493 "tfhar", "tfiar", "texasr",
1494 /* SPE High registers. */
1495 "0", "1", "2", "3", "4", "5", "6", "7",
1496 "8", "9", "10", "11", "12", "13", "14", "15",
1497 "16", "17", "18", "19", "20", "21", "22", "23",
1498 "24", "25", "26", "27", "28", "29", "30", "31"
1501 #ifdef TARGET_REGNAMES
1502 static const char alt_reg_names[][8] =
1504 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1505 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1506 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1507 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1508 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1509 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1510 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1511 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1512 "mq", "lr", "ctr", "ap",
1513 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1514 "ca",
1515 /* AltiVec registers. */
1516 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1517 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1518 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1519 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1520 "vrsave", "vscr",
1521 /* SPE registers. */
1522 "spe_acc", "spefscr",
1523 /* Soft frame pointer. */
1524 "sfp",
1525 /* HTM SPR registers. */
1526 "tfhar", "tfiar", "texasr",
1527 /* SPE High registers. */
1528 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1529 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1530 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1531 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1533 #endif
1535 /* Table of valid machine attributes. */
1537 static const struct attribute_spec rs6000_attribute_table[] =
1539 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1540 affects_type_identity, handler, exclude } */
1541 { "altivec", 1, 1, false, true, false, false,
1542 rs6000_handle_altivec_attribute, NULL },
1543 { "longcall", 0, 0, false, true, true, false,
1544 rs6000_handle_longcall_attribute, NULL },
1545 { "shortcall", 0, 0, false, true, true, false,
1546 rs6000_handle_longcall_attribute, NULL },
1547 { "ms_struct", 0, 0, false, false, false, false,
1548 rs6000_handle_struct_attribute, NULL },
1549 { "gcc_struct", 0, 0, false, false, false, false,
1550 rs6000_handle_struct_attribute, NULL },
1551 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1552 SUBTARGET_ATTRIBUTE_TABLE,
1553 #endif
1554 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1557 #ifndef TARGET_PROFILE_KERNEL
1558 #define TARGET_PROFILE_KERNEL 0
1559 #endif
1561 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1562 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1564 /* Initialize the GCC target structure. */
1565 #undef TARGET_ATTRIBUTE_TABLE
1566 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1567 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1568 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1569 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1570 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1572 #undef TARGET_ASM_ALIGNED_DI_OP
1573 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1575 /* Default unaligned ops are only provided for ELF. Find the ops needed
1576 for non-ELF systems. */
1577 #ifndef OBJECT_FORMAT_ELF
1578 #if TARGET_XCOFF
1579 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1580 64-bit targets. */
1581 #undef TARGET_ASM_UNALIGNED_HI_OP
1582 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1583 #undef TARGET_ASM_UNALIGNED_SI_OP
1584 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1585 #undef TARGET_ASM_UNALIGNED_DI_OP
1586 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1587 #else
1588 /* For Darwin. */
1589 #undef TARGET_ASM_UNALIGNED_HI_OP
1590 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1591 #undef TARGET_ASM_UNALIGNED_SI_OP
1592 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1593 #undef TARGET_ASM_UNALIGNED_DI_OP
1594 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1595 #undef TARGET_ASM_ALIGNED_DI_OP
1596 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1597 #endif
1598 #endif
1600 /* This hook deals with fixups for relocatable code and DI-mode objects
1601 in 64-bit code. */
1602 #undef TARGET_ASM_INTEGER
1603 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1605 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1606 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1607 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1608 #endif
1610 #undef TARGET_SET_UP_BY_PROLOGUE
1611 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1613 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1614 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1615 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1616 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1617 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1618 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1619 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1620 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1621 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1623 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1624 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1626 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1627 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1629 #undef TARGET_INTERNAL_ARG_POINTER
1630 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1632 #undef TARGET_HAVE_TLS
1633 #define TARGET_HAVE_TLS HAVE_AS_TLS
1635 #undef TARGET_CANNOT_FORCE_CONST_MEM
1636 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1638 #undef TARGET_DELEGITIMIZE_ADDRESS
1639 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1641 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1642 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1644 #undef TARGET_LEGITIMATE_COMBINED_INSN
1645 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1647 #undef TARGET_ASM_FUNCTION_PROLOGUE
1648 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1649 #undef TARGET_ASM_FUNCTION_EPILOGUE
1650 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1652 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1653 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1655 #undef TARGET_LEGITIMIZE_ADDRESS
1656 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1658 #undef TARGET_SCHED_VARIABLE_ISSUE
1659 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1661 #undef TARGET_SCHED_ISSUE_RATE
1662 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1663 #undef TARGET_SCHED_ADJUST_COST
1664 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1665 #undef TARGET_SCHED_ADJUST_PRIORITY
1666 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1667 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1668 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1669 #undef TARGET_SCHED_INIT
1670 #define TARGET_SCHED_INIT rs6000_sched_init
1671 #undef TARGET_SCHED_FINISH
1672 #define TARGET_SCHED_FINISH rs6000_sched_finish
1673 #undef TARGET_SCHED_REORDER
1674 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1675 #undef TARGET_SCHED_REORDER2
1676 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1678 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1679 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1684 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1685 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1686 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1687 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1688 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1689 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1690 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1691 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1693 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1694 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1696 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1697 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1698 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1699 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1700 rs6000_builtin_support_vector_misalignment
1701 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1702 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1703 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1704 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1705 rs6000_builtin_vectorization_cost
1706 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1707 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1708 rs6000_preferred_simd_mode
1709 #undef TARGET_VECTORIZE_INIT_COST
1710 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1711 #undef TARGET_VECTORIZE_ADD_STMT_COST
1712 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1713 #undef TARGET_VECTORIZE_FINISH_COST
1714 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1715 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1716 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1718 #undef TARGET_INIT_BUILTINS
1719 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1720 #undef TARGET_BUILTIN_DECL
1721 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1723 #undef TARGET_FOLD_BUILTIN
1724 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1725 #undef TARGET_GIMPLE_FOLD_BUILTIN
1726 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1728 #undef TARGET_EXPAND_BUILTIN
1729 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1731 #undef TARGET_MANGLE_TYPE
1732 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1734 #undef TARGET_INIT_LIBFUNCS
1735 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1737 #if TARGET_MACHO
1738 #undef TARGET_BINDS_LOCAL_P
1739 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1740 #endif
1742 #undef TARGET_MS_BITFIELD_LAYOUT_P
1743 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1745 #undef TARGET_ASM_OUTPUT_MI_THUNK
1746 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1748 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1749 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1751 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1752 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1754 #undef TARGET_REGISTER_MOVE_COST
1755 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1756 #undef TARGET_MEMORY_MOVE_COST
1757 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1758 #undef TARGET_CANNOT_COPY_INSN_P
1759 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1760 #undef TARGET_RTX_COSTS
1761 #define TARGET_RTX_COSTS rs6000_rtx_costs
1762 #undef TARGET_ADDRESS_COST
1763 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1765 #undef TARGET_DWARF_REGISTER_SPAN
1766 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1768 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1769 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1771 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1772 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1774 #undef TARGET_PROMOTE_FUNCTION_MODE
1775 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1777 #undef TARGET_RETURN_IN_MEMORY
1778 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1780 #undef TARGET_RETURN_IN_MSB
1781 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1783 #undef TARGET_SETUP_INCOMING_VARARGS
1784 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1786 /* Always strict argument naming on rs6000. */
1787 #undef TARGET_STRICT_ARGUMENT_NAMING
1788 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1789 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1790 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1791 #undef TARGET_SPLIT_COMPLEX_ARG
1792 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1793 #undef TARGET_MUST_PASS_IN_STACK
1794 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1795 #undef TARGET_PASS_BY_REFERENCE
1796 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1797 #undef TARGET_ARG_PARTIAL_BYTES
1798 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1799 #undef TARGET_FUNCTION_ARG_ADVANCE
1800 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1801 #undef TARGET_FUNCTION_ARG
1802 #define TARGET_FUNCTION_ARG rs6000_function_arg
1803 #undef TARGET_FUNCTION_ARG_PADDING
1804 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1805 #undef TARGET_FUNCTION_ARG_BOUNDARY
1806 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1808 #undef TARGET_BUILD_BUILTIN_VA_LIST
1809 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1811 #undef TARGET_EXPAND_BUILTIN_VA_START
1812 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1814 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1815 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1817 #undef TARGET_EH_RETURN_FILTER_MODE
1818 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1820 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1821 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1823 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1824 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1826 #undef TARGET_FLOATN_MODE
1827 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1829 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1830 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1832 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1833 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1835 #undef TARGET_MD_ASM_ADJUST
1836 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1838 #undef TARGET_OPTION_OVERRIDE
1839 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1841 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1842 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1843 rs6000_builtin_vectorized_function
1845 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1846 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1847 rs6000_builtin_md_vectorized_function
1849 #undef TARGET_STACK_PROTECT_GUARD
1850 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1852 #if !TARGET_MACHO
1853 #undef TARGET_STACK_PROTECT_FAIL
1854 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1855 #endif
1857 #ifdef HAVE_AS_TLS
1858 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1859 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1860 #endif
1862 /* Use a 32-bit anchor range. This leads to sequences like:
1864 addis tmp,anchor,high
1865 add dest,tmp,low
1867 where tmp itself acts as an anchor, and can be shared between
1868 accesses to the same 64k page. */
1869 #undef TARGET_MIN_ANCHOR_OFFSET
1870 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1871 #undef TARGET_MAX_ANCHOR_OFFSET
1872 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1873 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1874 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1875 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1876 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1878 #undef TARGET_BUILTIN_RECIPROCAL
1879 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1881 #undef TARGET_EXPAND_TO_RTL_HOOK
1882 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1884 #undef TARGET_INSTANTIATE_DECLS
1885 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1887 #undef TARGET_SECONDARY_RELOAD
1888 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1889 #undef TARGET_SECONDARY_MEMORY_NEEDED
1890 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1891 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1892 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1894 #undef TARGET_LEGITIMATE_ADDRESS_P
1895 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1897 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1898 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1900 #undef TARGET_LRA_P
1901 #define TARGET_LRA_P rs6000_lra_p
1903 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1904 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1906 #undef TARGET_CAN_ELIMINATE
1907 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1909 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1910 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1912 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1913 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1915 #undef TARGET_TRAMPOLINE_INIT
1916 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1918 #undef TARGET_FUNCTION_VALUE
1919 #define TARGET_FUNCTION_VALUE rs6000_function_value
1921 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1922 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1924 #undef TARGET_OPTION_SAVE
1925 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1927 #undef TARGET_OPTION_RESTORE
1928 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1930 #undef TARGET_OPTION_PRINT
1931 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1933 #undef TARGET_CAN_INLINE_P
1934 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1936 #undef TARGET_SET_CURRENT_FUNCTION
1937 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1939 #undef TARGET_LEGITIMATE_CONSTANT_P
1940 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1942 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1943 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1945 #undef TARGET_CAN_USE_DOLOOP_P
1946 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1948 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1949 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1951 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1952 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1953 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1954 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1955 #undef TARGET_UNWIND_WORD_MODE
1956 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1958 #undef TARGET_OFFLOAD_OPTIONS
1959 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1961 #undef TARGET_C_MODE_FOR_SUFFIX
1962 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1964 #undef TARGET_INVALID_BINARY_OP
1965 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1967 #undef TARGET_OPTAB_SUPPORTED_P
1968 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1970 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1971 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1973 #undef TARGET_HARD_REGNO_NREGS
1974 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1975 #undef TARGET_HARD_REGNO_MODE_OK
1976 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1978 #undef TARGET_MODES_TIEABLE_P
1979 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1981 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1982 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1983 rs6000_hard_regno_call_part_clobbered
1985 #undef TARGET_SLOW_UNALIGNED_ACCESS
1986 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1988 #undef TARGET_CAN_CHANGE_MODE_CLASS
1989 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1991 #undef TARGET_CONSTANT_ALIGNMENT
1992 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1994 #undef TARGET_STARTING_FRAME_OFFSET
1995 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1998 /* Processor table. */
1999 struct rs6000_ptt
2001 const char *const name; /* Canonical processor name. */
2002 const enum processor_type processor; /* Processor type enum value. */
2003 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2006 static struct rs6000_ptt const processor_target_table[] =
2008 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2009 #include "powerpcspe-cpus.def"
2010 #undef RS6000_CPU
2013 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2014 name is invalid. */
2016 static int
2017 rs6000_cpu_name_lookup (const char *name)
2019 size_t i;
2021 if (name != NULL)
2023 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2024 if (! strcmp (name, processor_target_table[i].name))
2025 return (int)i;
2028 return -1;
2032 /* Return number of consecutive hard regs needed starting at reg REGNO
2033 to hold something of mode MODE.
2034 This is ordinarily the length in words of a value of mode MODE
2035 but can be less for certain modes in special long registers.
2037 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2038 scalar instructions. The upper 32 bits are only available to the
2039 SIMD instructions.
2041 POWER and PowerPC GPRs hold 32 bits worth;
2042 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2044 static int
2045 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2047 unsigned HOST_WIDE_INT reg_size;
2049 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2050 128-bit floating point that can go in vector registers, which has VSX
2051 memory addressing. */
2052 if (FP_REGNO_P (regno))
2053 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2054 ? UNITS_PER_VSX_WORD
2055 : UNITS_PER_FP_WORD);
2057 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2058 reg_size = UNITS_PER_SPE_WORD;
2060 else if (ALTIVEC_REGNO_P (regno))
2061 reg_size = UNITS_PER_ALTIVEC_WORD;
2063 /* The value returned for SCmode in the E500 double case is 2 for
2064 ABI compatibility; storing an SCmode value in a single register
2065 would require function_arg and rs6000_spe_function_arg to handle
2066 SCmode so as to pass the value correctly in a pair of
2067 registers. */
2068 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
2069 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
2070 reg_size = UNITS_PER_FP_WORD;
2072 else
2073 reg_size = UNITS_PER_WORD;
2075 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2078 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2079 MODE. */
2080 static int
2081 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2083 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2085 if (COMPLEX_MODE_P (mode))
2086 mode = GET_MODE_INNER (mode);
2088 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2089 register combinations, and use PTImode where we need to deal with quad
2090 word memory operations. Don't allow quad words in the argument or frame
2091 pointer registers, just registers 0..31. */
2092 if (mode == PTImode)
2093 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2094 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2095 && ((regno & 1) == 0));
2097 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2098 implementations. Don't allow an item to be split between a FP register
2099 and an Altivec register. Allow TImode in all VSX registers if the user
2100 asked for it. */
2101 if (TARGET_VSX && VSX_REGNO_P (regno)
2102 && (VECTOR_MEM_VSX_P (mode)
2103 || FLOAT128_VECTOR_P (mode)
2104 || reg_addr[mode].scalar_in_vmx_p
2105 || (TARGET_VSX_TIMODE && mode == TImode)
2106 || (TARGET_VADDUQM && mode == V1TImode)))
2108 if (FP_REGNO_P (regno))
2109 return FP_REGNO_P (last_regno);
2111 if (ALTIVEC_REGNO_P (regno))
2113 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2114 return 0;
2116 return ALTIVEC_REGNO_P (last_regno);
2120 /* The GPRs can hold any mode, but values bigger than one register
2121 cannot go past R31. */
2122 if (INT_REGNO_P (regno))
2123 return INT_REGNO_P (last_regno);
2125 /* The float registers (except for VSX vector modes) can only hold floating
2126 modes and DImode. */
2127 if (FP_REGNO_P (regno))
2129 if (FLOAT128_VECTOR_P (mode))
2130 return false;
2132 if (SCALAR_FLOAT_MODE_P (mode)
2133 && (mode != TDmode || (regno % 2) == 0)
2134 && FP_REGNO_P (last_regno))
2135 return 1;
2137 if (GET_MODE_CLASS (mode) == MODE_INT)
2139 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2140 return 1;
2142 if (TARGET_VSX_SMALL_INTEGER)
2144 if (mode == SImode)
2145 return 1;
2147 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2148 return 1;
2152 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2153 && PAIRED_VECTOR_MODE (mode))
2154 return 1;
2156 return 0;
2159 /* The CR register can only hold CC modes. */
2160 if (CR_REGNO_P (regno))
2161 return GET_MODE_CLASS (mode) == MODE_CC;
2163 if (CA_REGNO_P (regno))
2164 return mode == Pmode || mode == SImode;
2166 /* AltiVec only in AldyVec registers. */
2167 if (ALTIVEC_REGNO_P (regno))
2168 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2169 || mode == V1TImode);
2171 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2172 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2173 return 1;
2175 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2176 and it must be able to fit within the register set. */
2178 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2181 /* Implement TARGET_HARD_REGNO_NREGS. */
2183 static unsigned int
2184 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2186 return rs6000_hard_regno_nregs[mode][regno];
2189 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2191 static bool
2192 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2194 return rs6000_hard_regno_mode_ok_p[mode][regno];
2197 /* Implement TARGET_MODES_TIEABLE_P.
2199 PTImode cannot tie with other modes because PTImode is restricted to even
2200 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2201 57744).
2203 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2204 128-bit floating point on VSX systems ties with other vectors. */
2206 static bool
2207 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2209 if (mode1 == PTImode)
2210 return mode2 == PTImode;
2211 if (mode2 == PTImode)
2212 return false;
2214 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2215 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2216 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2217 return false;
2219 if (SCALAR_FLOAT_MODE_P (mode1))
2220 return SCALAR_FLOAT_MODE_P (mode2);
2221 if (SCALAR_FLOAT_MODE_P (mode2))
2222 return false;
2224 if (GET_MODE_CLASS (mode1) == MODE_CC)
2225 return GET_MODE_CLASS (mode2) == MODE_CC;
2226 if (GET_MODE_CLASS (mode2) == MODE_CC)
2227 return false;
2229 if (SPE_VECTOR_MODE (mode1))
2230 return SPE_VECTOR_MODE (mode2);
2231 if (SPE_VECTOR_MODE (mode2))
2232 return false;
2234 return true;
2237 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2239 static bool
2240 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2242 if (TARGET_32BIT
2243 && TARGET_POWERPC64
2244 && GET_MODE_SIZE (mode) > 4
2245 && INT_REGNO_P (regno))
2246 return true;
2248 if (TARGET_VSX
2249 && FP_REGNO_P (regno)
2250 && GET_MODE_SIZE (mode) > 8
2251 && !FLOAT128_2REG_P (mode))
2252 return true;
2254 return false;
2257 /* Print interesting facts about registers. */
2258 static void
2259 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2261 int r, m;
2263 for (r = first_regno; r <= last_regno; ++r)
2265 const char *comma = "";
2266 int len;
2268 if (first_regno == last_regno)
2269 fprintf (stderr, "%s:\t", reg_name);
2270 else
2271 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2273 len = 8;
2274 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2275 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2277 if (len > 70)
2279 fprintf (stderr, ",\n\t");
2280 len = 8;
2281 comma = "";
2284 if (rs6000_hard_regno_nregs[m][r] > 1)
2285 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2286 rs6000_hard_regno_nregs[m][r]);
2287 else
2288 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2290 comma = ", ";
2293 if (call_used_regs[r])
2295 if (len > 70)
2297 fprintf (stderr, ",\n\t");
2298 len = 8;
2299 comma = "";
2302 len += fprintf (stderr, "%s%s", comma, "call-used");
2303 comma = ", ";
2306 if (fixed_regs[r])
2308 if (len > 70)
2310 fprintf (stderr, ",\n\t");
2311 len = 8;
2312 comma = "";
2315 len += fprintf (stderr, "%s%s", comma, "fixed");
2316 comma = ", ";
2319 if (len > 70)
2321 fprintf (stderr, ",\n\t");
2322 comma = "";
2325 len += fprintf (stderr, "%sreg-class = %s", comma,
2326 reg_class_names[(int)rs6000_regno_regclass[r]]);
2327 comma = ", ";
2329 if (len > 70)
2331 fprintf (stderr, ",\n\t");
2332 comma = "";
2335 fprintf (stderr, "%sregno = %d\n", comma, r);
2339 static const char *
2340 rs6000_debug_vector_unit (enum rs6000_vector v)
2342 const char *ret;
2344 switch (v)
2346 case VECTOR_NONE: ret = "none"; break;
2347 case VECTOR_ALTIVEC: ret = "altivec"; break;
2348 case VECTOR_VSX: ret = "vsx"; break;
2349 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2350 case VECTOR_PAIRED: ret = "paired"; break;
2351 case VECTOR_SPE: ret = "spe"; break;
2352 case VECTOR_OTHER: ret = "other"; break;
2353 default: ret = "unknown"; break;
2356 return ret;
2359 /* Inner function printing just the address mask for a particular reload
2360 register class. */
2361 DEBUG_FUNCTION char *
2362 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2364 static char ret[8];
2365 char *p = ret;
2367 if ((mask & RELOAD_REG_VALID) != 0)
2368 *p++ = 'v';
2369 else if (keep_spaces)
2370 *p++ = ' ';
2372 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2373 *p++ = 'm';
2374 else if (keep_spaces)
2375 *p++ = ' ';
2377 if ((mask & RELOAD_REG_INDEXED) != 0)
2378 *p++ = 'i';
2379 else if (keep_spaces)
2380 *p++ = ' ';
2382 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2383 *p++ = 'O';
2384 else if ((mask & RELOAD_REG_OFFSET) != 0)
2385 *p++ = 'o';
2386 else if (keep_spaces)
2387 *p++ = ' ';
2389 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2390 *p++ = '+';
2391 else if (keep_spaces)
2392 *p++ = ' ';
2394 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2395 *p++ = '+';
2396 else if (keep_spaces)
2397 *p++ = ' ';
2399 if ((mask & RELOAD_REG_AND_M16) != 0)
2400 *p++ = '&';
2401 else if (keep_spaces)
2402 *p++ = ' ';
2404 *p = '\0';
2406 return ret;
2409 /* Print the address masks in a human readble fashion. */
2410 DEBUG_FUNCTION void
2411 rs6000_debug_print_mode (ssize_t m)
2413 ssize_t rc;
2414 int spaces = 0;
2415 bool fuse_extra_p;
2417 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2418 for (rc = 0; rc < N_RELOAD_REG; rc++)
2419 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2420 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2422 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2423 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2424 fprintf (stderr, " Reload=%c%c",
2425 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2426 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2427 else
2428 spaces += sizeof (" Reload=sl") - 1;
2430 if (reg_addr[m].scalar_in_vmx_p)
2432 fprintf (stderr, "%*s Upper=y", spaces, "");
2433 spaces = 0;
2435 else
2436 spaces += sizeof (" Upper=y") - 1;
2438 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2439 || reg_addr[m].fused_toc);
2440 if (!fuse_extra_p)
2442 for (rc = 0; rc < N_RELOAD_REG; rc++)
2444 if (rc != RELOAD_REG_ANY)
2446 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2447 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2448 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2449 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2450 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2452 fuse_extra_p = true;
2453 break;
2459 if (fuse_extra_p)
2461 fprintf (stderr, "%*s Fuse:", spaces, "");
2462 spaces = 0;
2464 for (rc = 0; rc < N_RELOAD_REG; rc++)
2466 if (rc != RELOAD_REG_ANY)
2468 char load, store;
2470 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2471 load = 'l';
2472 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2473 load = 'L';
2474 else
2475 load = '-';
2477 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2478 store = 's';
2479 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2480 store = 'S';
2481 else
2482 store = '-';
2484 if (load == '-' && store == '-')
2485 spaces += 5;
2486 else
2488 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2489 reload_reg_map[rc].name[0], load, store);
2490 spaces = 0;
2495 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2497 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2498 spaces = 0;
2500 else
2501 spaces += sizeof (" P8gpr") - 1;
2503 if (reg_addr[m].fused_toc)
2505 fprintf (stderr, "%*sToc", (spaces + 1), "");
2506 spaces = 0;
2508 else
2509 spaces += sizeof (" Toc") - 1;
2511 else
2512 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2514 if (rs6000_vector_unit[m] != VECTOR_NONE
2515 || rs6000_vector_mem[m] != VECTOR_NONE)
2517 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2518 spaces, "",
2519 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2520 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2523 fputs ("\n", stderr);
2526 #define DEBUG_FMT_ID "%-32s= "
2527 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2528 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2529 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2531 /* Print various interesting information with -mdebug=reg. */
2532 static void
2533 rs6000_debug_reg_global (void)
2535 static const char *const tf[2] = { "false", "true" };
2536 const char *nl = (const char *)0;
2537 int m;
2538 size_t m1, m2, v;
2539 char costly_num[20];
2540 char nop_num[20];
2541 char flags_buffer[40];
2542 const char *costly_str;
2543 const char *nop_str;
2544 const char *trace_str;
2545 const char *abi_str;
2546 const char *cmodel_str;
2547 struct cl_target_option cl_opts;
2549 /* Modes we want tieable information on. */
2550 static const machine_mode print_tieable_modes[] = {
2551 QImode,
2552 HImode,
2553 SImode,
2554 DImode,
2555 TImode,
2556 PTImode,
2557 SFmode,
2558 DFmode,
2559 TFmode,
2560 IFmode,
2561 KFmode,
2562 SDmode,
2563 DDmode,
2564 TDmode,
2565 V8QImode,
2566 V4HImode,
2567 V2SImode,
2568 V16QImode,
2569 V8HImode,
2570 V4SImode,
2571 V2DImode,
2572 V1TImode,
2573 V32QImode,
2574 V16HImode,
2575 V8SImode,
2576 V4DImode,
2577 V2TImode,
2578 V2SFmode,
2579 V4SFmode,
2580 V2DFmode,
2581 V8SFmode,
2582 V4DFmode,
2583 CCmode,
2584 CCUNSmode,
2585 CCEQmode,
2588 /* Virtual regs we are interested in. */
2589 const static struct {
2590 int regno; /* register number. */
2591 const char *name; /* register name. */
2592 } virtual_regs[] = {
2593 { STACK_POINTER_REGNUM, "stack pointer:" },
2594 { TOC_REGNUM, "toc: " },
2595 { STATIC_CHAIN_REGNUM, "static chain: " },
2596 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2597 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2598 { ARG_POINTER_REGNUM, "arg pointer: " },
2599 { FRAME_POINTER_REGNUM, "frame pointer:" },
2600 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2601 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2602 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2603 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2604 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2605 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2606 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2607 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2608 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2611 fputs ("\nHard register information:\n", stderr);
2612 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2613 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2614 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2615 LAST_ALTIVEC_REGNO,
2616 "vs");
2617 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2618 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2619 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2620 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2621 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2622 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2623 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2624 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2626 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2627 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2628 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2630 fprintf (stderr,
2631 "\n"
2632 "d reg_class = %s\n"
2633 "f reg_class = %s\n"
2634 "v reg_class = %s\n"
2635 "wa reg_class = %s\n"
2636 "wb reg_class = %s\n"
2637 "wd reg_class = %s\n"
2638 "we reg_class = %s\n"
2639 "wf reg_class = %s\n"
2640 "wg reg_class = %s\n"
2641 "wh reg_class = %s\n"
2642 "wi reg_class = %s\n"
2643 "wj reg_class = %s\n"
2644 "wk reg_class = %s\n"
2645 "wl reg_class = %s\n"
2646 "wm reg_class = %s\n"
2647 "wo reg_class = %s\n"
2648 "wp reg_class = %s\n"
2649 "wq reg_class = %s\n"
2650 "wr reg_class = %s\n"
2651 "ws reg_class = %s\n"
2652 "wt reg_class = %s\n"
2653 "wu reg_class = %s\n"
2654 "wv reg_class = %s\n"
2655 "ww reg_class = %s\n"
2656 "wx reg_class = %s\n"
2657 "wy reg_class = %s\n"
2658 "wz reg_class = %s\n"
2659 "wA reg_class = %s\n"
2660 "wH reg_class = %s\n"
2661 "wI reg_class = %s\n"
2662 "wJ reg_class = %s\n"
2663 "wK reg_class = %s\n"
2664 "\n",
2665 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2666 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2667 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2668 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2669 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2670 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2671 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2672 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2673 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2674 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2675 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2676 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2677 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2678 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2679 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2680 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2681 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2682 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2683 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2684 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2685 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2686 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2687 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2688 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2689 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2690 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2691 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2692 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2693 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2694 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2695 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2696 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2698 nl = "\n";
2699 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2700 rs6000_debug_print_mode (m);
2702 fputs ("\n", stderr);
2704 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2706 machine_mode mode1 = print_tieable_modes[m1];
2707 bool first_time = true;
2709 nl = (const char *)0;
2710 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2712 machine_mode mode2 = print_tieable_modes[m2];
2713 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2715 if (first_time)
2717 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2718 nl = "\n";
2719 first_time = false;
2722 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2726 if (!first_time)
2727 fputs ("\n", stderr);
2730 if (nl)
2731 fputs (nl, stderr);
2733 if (rs6000_recip_control)
2735 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2737 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2738 if (rs6000_recip_bits[m])
2740 fprintf (stderr,
2741 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2742 GET_MODE_NAME (m),
2743 (RS6000_RECIP_AUTO_RE_P (m)
2744 ? "auto"
2745 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2746 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2747 ? "auto"
2748 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2751 fputs ("\n", stderr);
2754 if (rs6000_cpu_index >= 0)
2756 const char *name = processor_target_table[rs6000_cpu_index].name;
2757 HOST_WIDE_INT flags
2758 = processor_target_table[rs6000_cpu_index].target_enable;
2760 sprintf (flags_buffer, "-mcpu=%s flags", name);
2761 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2763 else
2764 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2766 if (rs6000_tune_index >= 0)
2768 const char *name = processor_target_table[rs6000_tune_index].name;
2769 HOST_WIDE_INT flags
2770 = processor_target_table[rs6000_tune_index].target_enable;
2772 sprintf (flags_buffer, "-mtune=%s flags", name);
2773 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2775 else
2776 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2778 cl_target_option_save (&cl_opts, &global_options);
2779 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2780 rs6000_isa_flags);
2782 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2783 rs6000_isa_flags_explicit);
2785 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2786 rs6000_builtin_mask);
2788 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2790 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2791 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2793 switch (rs6000_sched_costly_dep)
2795 case max_dep_latency:
2796 costly_str = "max_dep_latency";
2797 break;
2799 case no_dep_costly:
2800 costly_str = "no_dep_costly";
2801 break;
2803 case all_deps_costly:
2804 costly_str = "all_deps_costly";
2805 break;
2807 case true_store_to_load_dep_costly:
2808 costly_str = "true_store_to_load_dep_costly";
2809 break;
2811 case store_to_load_dep_costly:
2812 costly_str = "store_to_load_dep_costly";
2813 break;
2815 default:
2816 costly_str = costly_num;
2817 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2818 break;
2821 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2823 switch (rs6000_sched_insert_nops)
2825 case sched_finish_regroup_exact:
2826 nop_str = "sched_finish_regroup_exact";
2827 break;
2829 case sched_finish_pad_groups:
2830 nop_str = "sched_finish_pad_groups";
2831 break;
2833 case sched_finish_none:
2834 nop_str = "sched_finish_none";
2835 break;
2837 default:
2838 nop_str = nop_num;
2839 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2840 break;
2843 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2845 switch (rs6000_sdata)
2847 default:
2848 case SDATA_NONE:
2849 break;
2851 case SDATA_DATA:
2852 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2853 break;
2855 case SDATA_SYSV:
2856 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2857 break;
2859 case SDATA_EABI:
2860 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2861 break;
2865 switch (rs6000_traceback)
2867 case traceback_default: trace_str = "default"; break;
2868 case traceback_none: trace_str = "none"; break;
2869 case traceback_part: trace_str = "part"; break;
2870 case traceback_full: trace_str = "full"; break;
2871 default: trace_str = "unknown"; break;
2874 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2876 switch (rs6000_current_cmodel)
2878 case CMODEL_SMALL: cmodel_str = "small"; break;
2879 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2880 case CMODEL_LARGE: cmodel_str = "large"; break;
2881 default: cmodel_str = "unknown"; break;
2884 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2886 switch (rs6000_current_abi)
2888 case ABI_NONE: abi_str = "none"; break;
2889 case ABI_AIX: abi_str = "aix"; break;
2890 case ABI_ELFv2: abi_str = "ELFv2"; break;
2891 case ABI_V4: abi_str = "V4"; break;
2892 case ABI_DARWIN: abi_str = "darwin"; break;
2893 default: abi_str = "unknown"; break;
2896 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2898 if (rs6000_altivec_abi)
2899 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2901 if (rs6000_spe_abi)
2902 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2904 if (rs6000_darwin64_abi)
2905 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2907 if (rs6000_float_gprs)
2908 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2910 fprintf (stderr, DEBUG_FMT_S, "fprs",
2911 (TARGET_FPRS ? "true" : "false"));
2913 fprintf (stderr, DEBUG_FMT_S, "single_float",
2914 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2916 fprintf (stderr, DEBUG_FMT_S, "double_float",
2917 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2919 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2920 (TARGET_SOFT_FLOAT ? "true" : "false"));
2922 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2923 (TARGET_E500_SINGLE ? "true" : "false"));
2925 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2926 (TARGET_E500_DOUBLE ? "true" : "false"));
2928 if (TARGET_LINK_STACK)
2929 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2931 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2933 if (TARGET_P8_FUSION)
2935 char options[80];
2937 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2938 if (TARGET_TOC_FUSION)
2939 strcat (options, ", toc");
2941 if (TARGET_P8_FUSION_SIGN)
2942 strcat (options, ", sign");
2944 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2947 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2948 TARGET_SECURE_PLT ? "secure" : "bss");
2949 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2950 aix_struct_return ? "aix" : "sysv");
2951 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2952 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2953 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2954 tf[!!rs6000_align_branch_targets]);
2955 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2956 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2957 rs6000_long_double_type_size);
2958 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2959 (int)rs6000_sched_restricted_insns_priority);
2960 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2961 (int)END_BUILTINS);
2962 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2963 (int)RS6000_BUILTIN_COUNT);
2965 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2966 (int)TARGET_FLOAT128_ENABLE_TYPE);
2968 if (TARGET_VSX)
2969 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2970 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2972 if (TARGET_DIRECT_MOVE_128)
2973 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2974 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2978 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2979 legitimate address support to figure out the appropriate addressing to
2980 use. */
2982 static void
2983 rs6000_setup_reg_addr_masks (void)
2985 ssize_t rc, reg, m, nregs;
2986 addr_mask_type any_addr_mask, addr_mask;
2988 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2990 machine_mode m2 = (machine_mode) m;
2991 bool complex_p = false;
2992 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2993 size_t msize;
2995 if (COMPLEX_MODE_P (m2))
2997 complex_p = true;
2998 m2 = GET_MODE_INNER (m2);
3001 msize = GET_MODE_SIZE (m2);
3003 /* SDmode is special in that we want to access it only via REG+REG
3004 addressing on power7 and above, since we want to use the LFIWZX and
3005 STFIWZX instructions to load it. */
3006 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
3008 any_addr_mask = 0;
3009 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
3011 addr_mask = 0;
3012 reg = reload_reg_map[rc].reg;
3014 /* Can mode values go in the GPR/FPR/Altivec registers? */
3015 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
3017 bool small_int_vsx_p = (small_int_p
3018 && (rc == RELOAD_REG_FPR
3019 || rc == RELOAD_REG_VMX));
3021 nregs = rs6000_hard_regno_nregs[m][reg];
3022 addr_mask |= RELOAD_REG_VALID;
3024 /* Indicate if the mode takes more than 1 physical register. If
3025 it takes a single register, indicate it can do REG+REG
3026 addressing. Small integers in VSX registers can only do
3027 REG+REG addressing. */
3028 if (small_int_vsx_p)
3029 addr_mask |= RELOAD_REG_INDEXED;
3030 else if (nregs > 1 || m == BLKmode || complex_p)
3031 addr_mask |= RELOAD_REG_MULTIPLE;
3032 else
3033 addr_mask |= RELOAD_REG_INDEXED;
3035 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
3036 addressing. Restrict addressing on SPE for 64-bit types
3037 because of the SUBREG hackery used to address 64-bit floats in
3038 '32-bit' GPRs. If we allow scalars into Altivec registers,
3039 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
3041 if (TARGET_UPDATE
3042 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
3043 && msize <= 8
3044 && !VECTOR_MODE_P (m2)
3045 && !FLOAT128_VECTOR_P (m2)
3046 && !complex_p
3047 && !small_int_vsx_p
3048 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
3049 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
3050 && !(TARGET_E500_DOUBLE && msize == 8))
3052 addr_mask |= RELOAD_REG_PRE_INCDEC;
3054 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3055 we don't allow PRE_MODIFY for some multi-register
3056 operations. */
3057 switch (m)
3059 default:
3060 addr_mask |= RELOAD_REG_PRE_MODIFY;
3061 break;
3063 case E_DImode:
3064 if (TARGET_POWERPC64)
3065 addr_mask |= RELOAD_REG_PRE_MODIFY;
3066 break;
3068 case E_DFmode:
3069 case E_DDmode:
3070 if (TARGET_DF_INSN)
3071 addr_mask |= RELOAD_REG_PRE_MODIFY;
3072 break;
3077 /* GPR and FPR registers can do REG+OFFSET addressing, except
3078 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3079 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3080 if ((addr_mask != 0) && !indexed_only_p
3081 && msize <= 8
3082 && (rc == RELOAD_REG_GPR
3083 || ((msize == 8 || m2 == SFmode)
3084 && (rc == RELOAD_REG_FPR
3085 || (rc == RELOAD_REG_VMX
3086 && TARGET_P9_DFORM_SCALAR)))))
3087 addr_mask |= RELOAD_REG_OFFSET;
3089 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3090 instructions are enabled. The offset for 128-bit VSX registers is
3091 only 12-bits. While GPRs can handle the full offset range, VSX
3092 registers can only handle the restricted range. */
3093 else if ((addr_mask != 0) && !indexed_only_p
3094 && msize == 16 && TARGET_P9_DFORM_VECTOR
3095 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3096 || (m2 == TImode && TARGET_VSX_TIMODE)))
3098 addr_mask |= RELOAD_REG_OFFSET;
3099 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3100 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3103 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3104 addressing on 128-bit types. */
3105 if (rc == RELOAD_REG_VMX && msize == 16
3106 && (addr_mask & RELOAD_REG_VALID) != 0)
3107 addr_mask |= RELOAD_REG_AND_M16;
3109 reg_addr[m].addr_mask[rc] = addr_mask;
3110 any_addr_mask |= addr_mask;
3113 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3118 /* Initialize the various global tables that are based on register size. */
3119 static void
3120 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3122 ssize_t r, m, c;
3123 int align64;
3124 int align32;
3126 /* Precalculate REGNO_REG_CLASS. */
3127 rs6000_regno_regclass[0] = GENERAL_REGS;
3128 for (r = 1; r < 32; ++r)
3129 rs6000_regno_regclass[r] = BASE_REGS;
3131 for (r = 32; r < 64; ++r)
3132 rs6000_regno_regclass[r] = FLOAT_REGS;
3134 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3135 rs6000_regno_regclass[r] = NO_REGS;
3137 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3138 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3140 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3141 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3142 rs6000_regno_regclass[r] = CR_REGS;
3144 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3145 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3146 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3147 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3148 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3149 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
3150 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
3151 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3152 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3153 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3154 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3155 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3157 /* Precalculate register class to simpler reload register class. We don't
3158 need all of the register classes that are combinations of different
3159 classes, just the simple ones that have constraint letters. */
3160 for (c = 0; c < N_REG_CLASSES; c++)
3161 reg_class_to_reg_type[c] = NO_REG_TYPE;
3163 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3164 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3165 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3166 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3167 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3168 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3169 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3170 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3171 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3172 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3173 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
3174 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
3176 if (TARGET_VSX)
3178 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3179 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3181 else
3183 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3184 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3187 /* Precalculate the valid memory formats as well as the vector information,
3188 this must be set up before the rs6000_hard_regno_nregs_internal calls
3189 below. */
3190 gcc_assert ((int)VECTOR_NONE == 0);
3191 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3192 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3194 gcc_assert ((int)CODE_FOR_nothing == 0);
3195 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3197 gcc_assert ((int)NO_REGS == 0);
3198 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3200 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3201 believes it can use native alignment or still uses 128-bit alignment. */
3202 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3204 align64 = 64;
3205 align32 = 32;
3207 else
3209 align64 = 128;
3210 align32 = 128;
3213 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3214 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3215 if (TARGET_FLOAT128_TYPE)
3217 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3218 rs6000_vector_align[KFmode] = 128;
3220 if (FLOAT128_IEEE_P (TFmode))
3222 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3223 rs6000_vector_align[TFmode] = 128;
3227 /* V2DF mode, VSX only. */
3228 if (TARGET_VSX)
3230 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3231 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3232 rs6000_vector_align[V2DFmode] = align64;
3235 /* V4SF mode, either VSX or Altivec. */
3236 if (TARGET_VSX)
3238 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3239 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3240 rs6000_vector_align[V4SFmode] = align32;
3242 else if (TARGET_ALTIVEC)
3244 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3245 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3246 rs6000_vector_align[V4SFmode] = align32;
3249 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3250 and stores. */
3251 if (TARGET_ALTIVEC)
3253 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3254 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3255 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3256 rs6000_vector_align[V4SImode] = align32;
3257 rs6000_vector_align[V8HImode] = align32;
3258 rs6000_vector_align[V16QImode] = align32;
3260 if (TARGET_VSX)
3262 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3263 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3264 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3266 else
3268 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3269 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3270 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3274 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3275 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3276 if (TARGET_VSX)
3278 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3279 rs6000_vector_unit[V2DImode]
3280 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3281 rs6000_vector_align[V2DImode] = align64;
3283 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3284 rs6000_vector_unit[V1TImode]
3285 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3286 rs6000_vector_align[V1TImode] = 128;
3289 /* DFmode, see if we want to use the VSX unit. Memory is handled
3290 differently, so don't set rs6000_vector_mem. */
3291 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3293 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3294 rs6000_vector_align[DFmode] = 64;
3297 /* SFmode, see if we want to use the VSX unit. */
3298 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3300 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3301 rs6000_vector_align[SFmode] = 32;
3304 /* Allow TImode in VSX register and set the VSX memory macros. */
3305 if (TARGET_VSX && TARGET_VSX_TIMODE)
3307 rs6000_vector_mem[TImode] = VECTOR_VSX;
3308 rs6000_vector_align[TImode] = align64;
3311 /* TODO add SPE and paired floating point vector support. */
3313 /* Register class constraints for the constraints that depend on compile
3314 switches. When the VSX code was added, different constraints were added
3315 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3316 of the VSX registers are used. The register classes for scalar floating
3317 point types is set, based on whether we allow that type into the upper
3318 (Altivec) registers. GCC has register classes to target the Altivec
3319 registers for load/store operations, to select using a VSX memory
3320 operation instead of the traditional floating point operation. The
3321 constraints are:
3323 d - Register class to use with traditional DFmode instructions.
3324 f - Register class to use with traditional SFmode instructions.
3325 v - Altivec register.
3326 wa - Any VSX register.
3327 wc - Reserved to represent individual CR bits (used in LLVM).
3328 wd - Preferred register class for V2DFmode.
3329 wf - Preferred register class for V4SFmode.
3330 wg - Float register for power6x move insns.
3331 wh - FP register for direct move instructions.
3332 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3333 wj - FP or VSX register to hold 64-bit integers for direct moves.
3334 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3335 wl - Float register if we can do 32-bit signed int loads.
3336 wm - VSX register for ISA 2.07 direct move operations.
3337 wn - always NO_REGS.
3338 wr - GPR if 64-bit mode is permitted.
3339 ws - Register class to do ISA 2.06 DF operations.
3340 wt - VSX register for TImode in VSX registers.
3341 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3342 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3343 ww - Register class to do SF conversions in with VSX operations.
3344 wx - Float register if we can do 32-bit int stores.
3345 wy - Register class to do ISA 2.07 SF operations.
3346 wz - Float register if we can do 32-bit unsigned int loads.
3347 wH - Altivec register if SImode is allowed in VSX registers.
3348 wI - VSX register if SImode is allowed in VSX registers.
3349 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3350 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3352 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3353 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3355 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3356 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3358 if (TARGET_VSX)
3360 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3361 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3362 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3364 if (TARGET_VSX_TIMODE)
3365 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3367 if (TARGET_UPPER_REGS_DF) /* DFmode */
3369 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3370 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3372 else
3373 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3375 if (TARGET_UPPER_REGS_DI) /* DImode */
3376 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3377 else
3378 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3381 /* Add conditional constraints based on various options, to allow us to
3382 collapse multiple insn patterns. */
3383 if (TARGET_ALTIVEC)
3384 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3386 if (TARGET_MFPGPR) /* DFmode */
3387 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3389 if (TARGET_LFIWAX)
3390 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3392 if (TARGET_DIRECT_MOVE)
3394 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3395 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3396 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3397 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3398 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3399 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3402 if (TARGET_POWERPC64)
3404 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3405 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3408 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3410 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3411 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3412 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3414 else if (TARGET_P8_VECTOR)
3416 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3417 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3419 else if (TARGET_VSX)
3420 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3422 if (TARGET_STFIWX)
3423 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3425 if (TARGET_LFIWZX)
3426 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3428 if (TARGET_FLOAT128_TYPE)
3430 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3431 if (FLOAT128_IEEE_P (TFmode))
3432 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3435 /* Support for new D-form instructions. */
3436 if (TARGET_P9_DFORM_SCALAR)
3437 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3439 /* Support for ISA 3.0 (power9) vectors. */
3440 if (TARGET_P9_VECTOR)
3441 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3443 /* Support for new direct moves (ISA 3.0 + 64bit). */
3444 if (TARGET_DIRECT_MOVE_128)
3445 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3447 /* Support small integers in VSX registers. */
3448 if (TARGET_VSX_SMALL_INTEGER)
3450 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3451 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3452 if (TARGET_P9_VECTOR)
3454 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3455 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3459 /* Set up the reload helper and direct move functions. */
3460 if (TARGET_VSX || TARGET_ALTIVEC)
3462 if (TARGET_64BIT)
3464 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3465 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3466 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3467 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3468 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3469 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3470 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3471 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3472 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3473 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3474 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3475 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3476 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3477 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3478 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3479 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3480 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3481 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3482 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3483 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3485 if (FLOAT128_VECTOR_P (KFmode))
3487 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3488 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3491 if (FLOAT128_VECTOR_P (TFmode))
3493 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3494 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3497 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3498 available. */
3499 if (TARGET_NO_SDMODE_STACK)
3501 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3502 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3505 if (TARGET_VSX_TIMODE)
3507 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3508 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3511 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3513 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3514 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3515 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3516 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3517 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3518 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3519 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3520 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3521 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3523 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3524 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3525 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3526 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3527 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3528 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3529 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3530 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3531 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3533 if (FLOAT128_VECTOR_P (KFmode))
3535 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3536 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3539 if (FLOAT128_VECTOR_P (TFmode))
3541 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3542 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3546 else
3548 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3549 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3550 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3551 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3552 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3553 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3554 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3555 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3556 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3557 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3558 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3559 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3560 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3561 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3562 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3563 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3564 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3565 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3566 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3567 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3569 if (FLOAT128_VECTOR_P (KFmode))
3571 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3572 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3575 if (FLOAT128_IEEE_P (TFmode))
3577 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3578 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3581 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3582 available. */
3583 if (TARGET_NO_SDMODE_STACK)
3585 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3586 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3589 if (TARGET_VSX_TIMODE)
3591 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3592 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3595 if (TARGET_DIRECT_MOVE)
3597 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3598 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3599 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3603 if (TARGET_UPPER_REGS_DF)
3604 reg_addr[DFmode].scalar_in_vmx_p = true;
3606 if (TARGET_UPPER_REGS_DI)
3607 reg_addr[DImode].scalar_in_vmx_p = true;
3609 if (TARGET_UPPER_REGS_SF)
3610 reg_addr[SFmode].scalar_in_vmx_p = true;
3612 if (TARGET_VSX_SMALL_INTEGER)
3614 reg_addr[SImode].scalar_in_vmx_p = true;
3615 if (TARGET_P9_VECTOR)
3617 reg_addr[HImode].scalar_in_vmx_p = true;
3618 reg_addr[QImode].scalar_in_vmx_p = true;
3623 /* Setup the fusion operations. */
3624 if (TARGET_P8_FUSION)
3626 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3627 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3628 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3629 if (TARGET_64BIT)
3630 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3633 if (TARGET_P9_FUSION)
3635 struct fuse_insns {
3636 enum machine_mode mode; /* mode of the fused type. */
3637 enum machine_mode pmode; /* pointer mode. */
3638 enum rs6000_reload_reg_type rtype; /* register type. */
3639 enum insn_code load; /* load insn. */
3640 enum insn_code store; /* store insn. */
3643 static const struct fuse_insns addis_insns[] = {
3644 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3645 CODE_FOR_fusion_vsx_di_sf_load,
3646 CODE_FOR_fusion_vsx_di_sf_store },
3648 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3649 CODE_FOR_fusion_vsx_si_sf_load,
3650 CODE_FOR_fusion_vsx_si_sf_store },
3652 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3653 CODE_FOR_fusion_vsx_di_df_load,
3654 CODE_FOR_fusion_vsx_di_df_store },
3656 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3657 CODE_FOR_fusion_vsx_si_df_load,
3658 CODE_FOR_fusion_vsx_si_df_store },
3660 { E_DImode, E_DImode, RELOAD_REG_FPR,
3661 CODE_FOR_fusion_vsx_di_di_load,
3662 CODE_FOR_fusion_vsx_di_di_store },
3664 { E_DImode, E_SImode, RELOAD_REG_FPR,
3665 CODE_FOR_fusion_vsx_si_di_load,
3666 CODE_FOR_fusion_vsx_si_di_store },
3668 { E_QImode, E_DImode, RELOAD_REG_GPR,
3669 CODE_FOR_fusion_gpr_di_qi_load,
3670 CODE_FOR_fusion_gpr_di_qi_store },
3672 { E_QImode, E_SImode, RELOAD_REG_GPR,
3673 CODE_FOR_fusion_gpr_si_qi_load,
3674 CODE_FOR_fusion_gpr_si_qi_store },
3676 { E_HImode, E_DImode, RELOAD_REG_GPR,
3677 CODE_FOR_fusion_gpr_di_hi_load,
3678 CODE_FOR_fusion_gpr_di_hi_store },
3680 { E_HImode, E_SImode, RELOAD_REG_GPR,
3681 CODE_FOR_fusion_gpr_si_hi_load,
3682 CODE_FOR_fusion_gpr_si_hi_store },
3684 { E_SImode, E_DImode, RELOAD_REG_GPR,
3685 CODE_FOR_fusion_gpr_di_si_load,
3686 CODE_FOR_fusion_gpr_di_si_store },
3688 { E_SImode, E_SImode, RELOAD_REG_GPR,
3689 CODE_FOR_fusion_gpr_si_si_load,
3690 CODE_FOR_fusion_gpr_si_si_store },
3692 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3693 CODE_FOR_fusion_gpr_di_sf_load,
3694 CODE_FOR_fusion_gpr_di_sf_store },
3696 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3697 CODE_FOR_fusion_gpr_si_sf_load,
3698 CODE_FOR_fusion_gpr_si_sf_store },
3700 { E_DImode, E_DImode, RELOAD_REG_GPR,
3701 CODE_FOR_fusion_gpr_di_di_load,
3702 CODE_FOR_fusion_gpr_di_di_store },
3704 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3705 CODE_FOR_fusion_gpr_di_df_load,
3706 CODE_FOR_fusion_gpr_di_df_store },
3709 machine_mode cur_pmode = Pmode;
3710 size_t i;
3712 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3714 machine_mode xmode = addis_insns[i].mode;
3715 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3717 if (addis_insns[i].pmode != cur_pmode)
3718 continue;
3720 if (rtype == RELOAD_REG_FPR
3721 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3722 continue;
3724 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3725 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3727 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3729 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3730 = addis_insns[i].load;
3731 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3732 = addis_insns[i].store;
3737 /* Note which types we support fusing TOC setup plus memory insn. We only do
3738 fused TOCs for medium/large code models. */
3739 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3740 && (TARGET_CMODEL != CMODEL_SMALL))
3742 reg_addr[QImode].fused_toc = true;
3743 reg_addr[HImode].fused_toc = true;
3744 reg_addr[SImode].fused_toc = true;
3745 reg_addr[DImode].fused_toc = true;
3746 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3748 if (TARGET_SINGLE_FLOAT)
3749 reg_addr[SFmode].fused_toc = true;
3750 if (TARGET_DOUBLE_FLOAT)
3751 reg_addr[DFmode].fused_toc = true;
3755 /* Precalculate HARD_REGNO_NREGS. */
3756 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3757 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3758 rs6000_hard_regno_nregs[m][r]
3759 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3761 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3762 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3763 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3764 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3765 rs6000_hard_regno_mode_ok_p[m][r] = true;
3767 /* Precalculate CLASS_MAX_NREGS sizes. */
3768 for (c = 0; c < LIM_REG_CLASSES; ++c)
3770 int reg_size;
3772 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3773 reg_size = UNITS_PER_VSX_WORD;
3775 else if (c == ALTIVEC_REGS)
3776 reg_size = UNITS_PER_ALTIVEC_WORD;
3778 else if (c == FLOAT_REGS)
3779 reg_size = UNITS_PER_FP_WORD;
3781 else
3782 reg_size = UNITS_PER_WORD;
3784 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3786 machine_mode m2 = (machine_mode)m;
3787 int reg_size2 = reg_size;
3789 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3790 in VSX. */
3791 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3792 reg_size2 = UNITS_PER_FP_WORD;
3794 rs6000_class_max_nregs[m][c]
3795 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3799 if (TARGET_E500_DOUBLE)
3800 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3802 /* Calculate which modes to automatically generate code to use a the
3803 reciprocal divide and square root instructions. In the future, possibly
3804 automatically generate the instructions even if the user did not specify
3805 -mrecip. The older machines double precision reciprocal sqrt estimate is
3806 not accurate enough. */
3807 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3808 if (TARGET_FRES)
3809 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3810 if (TARGET_FRE)
3811 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3812 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3813 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3814 if (VECTOR_UNIT_VSX_P (V2DFmode))
3815 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3817 if (TARGET_FRSQRTES)
3818 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3819 if (TARGET_FRSQRTE)
3820 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3821 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3822 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3823 if (VECTOR_UNIT_VSX_P (V2DFmode))
3824 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3826 if (rs6000_recip_control)
3828 if (!flag_finite_math_only)
3829 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3830 if (flag_trapping_math)
3831 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3832 if (!flag_reciprocal_math)
3833 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3834 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3836 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3837 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3838 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3840 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3841 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3842 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3844 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3845 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3846 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3848 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3849 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3850 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3852 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3853 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3854 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3856 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3857 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3858 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3860 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3861 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3862 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3864 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3865 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3866 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3870 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3871 legitimate address support to figure out the appropriate addressing to
3872 use. */
3873 rs6000_setup_reg_addr_masks ();
3875 if (global_init_p || TARGET_DEBUG_TARGET)
3877 if (TARGET_DEBUG_REG)
3878 rs6000_debug_reg_global ();
3880 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3881 fprintf (stderr,
3882 "SImode variable mult cost = %d\n"
3883 "SImode constant mult cost = %d\n"
3884 "SImode short constant mult cost = %d\n"
3885 "DImode multipliciation cost = %d\n"
3886 "SImode division cost = %d\n"
3887 "DImode division cost = %d\n"
3888 "Simple fp operation cost = %d\n"
3889 "DFmode multiplication cost = %d\n"
3890 "SFmode division cost = %d\n"
3891 "DFmode division cost = %d\n"
3892 "cache line size = %d\n"
3893 "l1 cache size = %d\n"
3894 "l2 cache size = %d\n"
3895 "simultaneous prefetches = %d\n"
3896 "\n",
3897 rs6000_cost->mulsi,
3898 rs6000_cost->mulsi_const,
3899 rs6000_cost->mulsi_const9,
3900 rs6000_cost->muldi,
3901 rs6000_cost->divsi,
3902 rs6000_cost->divdi,
3903 rs6000_cost->fp,
3904 rs6000_cost->dmul,
3905 rs6000_cost->sdiv,
3906 rs6000_cost->ddiv,
3907 rs6000_cost->cache_line_size,
3908 rs6000_cost->l1_cache_size,
3909 rs6000_cost->l2_cache_size,
3910 rs6000_cost->simultaneous_prefetches);
3914 #if TARGET_MACHO
3915 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3917 static void
3918 darwin_rs6000_override_options (void)
3920 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3921 off. */
3922 rs6000_altivec_abi = 1;
3923 TARGET_ALTIVEC_VRSAVE = 1;
3924 rs6000_current_abi = ABI_DARWIN;
3926 if (DEFAULT_ABI == ABI_DARWIN
3927 && TARGET_64BIT)
3928 darwin_one_byte_bool = 1;
3930 if (TARGET_64BIT && ! TARGET_POWERPC64)
3932 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3933 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3935 if (flag_mkernel)
3937 rs6000_default_long_calls = 1;
3938 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3941 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3942 Altivec. */
3943 if (!flag_mkernel && !flag_apple_kext
3944 && TARGET_64BIT
3945 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3946 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3948 /* Unless the user (not the configurer) has explicitly overridden
3949 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3950 G4 unless targeting the kernel. */
3951 if (!flag_mkernel
3952 && !flag_apple_kext
3953 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3954 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3955 && ! global_options_set.x_rs6000_cpu_index)
3957 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3960 #endif
3962 /* If not otherwise specified by a target, make 'long double' equivalent to
3963 'double'. */
3965 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3966 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3967 #endif
3969 /* Return the builtin mask of the various options used that could affect which
3970 builtins were used. In the past we used target_flags, but we've run out of
3971 bits, and some options like SPE and PAIRED are no longer in
3972 target_flags. */
3974 HOST_WIDE_INT
3975 rs6000_builtin_mask_calculate (void)
3977 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3978 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3979 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3980 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3981 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3982 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3983 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3984 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3985 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3986 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3987 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3988 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3989 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3990 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3991 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3992 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3993 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3994 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3995 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3996 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3997 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3998 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
4001 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
4002 to clobber the XER[CA] bit because clobbering that bit without telling
4003 the compiler worked just fine with versions of GCC before GCC 5, and
4004 breaking a lot of older code in ways that are hard to track down is
4005 not such a great idea. */
4007 static rtx_insn *
4008 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
4009 vec<const char *> &/*constraints*/,
4010 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
4012 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
4013 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
4014 return NULL;
4017 /* Override command line options.
4019 Combine build-specific configuration information with options
4020 specified on the command line to set various state variables which
4021 influence code generation, optimization, and expansion of built-in
4022 functions. Assure that command-line configuration preferences are
4023 compatible with each other and with the build configuration; issue
4024 warnings while adjusting configuration or error messages while
4025 rejecting configuration.
4027 Upon entry to this function:
4029 This function is called once at the beginning of
4030 compilation, and then again at the start and end of compiling
4031 each section of code that has a different configuration, as
4032 indicated, for example, by adding the
4034 __attribute__((__target__("cpu=power9")))
4036 qualifier to a function definition or, for example, by bracketing
4037 code between
4039 #pragma GCC target("altivec")
4043 #pragma GCC reset_options
4045 directives. Parameter global_init_p is true for the initial
4046 invocation, which initializes global variables, and false for all
4047 subsequent invocations.
4050 Various global state information is assumed to be valid. This
4051 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
4052 default CPU specified at build configure time, TARGET_DEFAULT,
4053 representing the default set of option flags for the default
4054 target, and global_options_set.x_rs6000_isa_flags, representing
4055 which options were requested on the command line.
4057 Upon return from this function:
4059 rs6000_isa_flags_explicit has a non-zero bit for each flag that
4060 was set by name on the command line. Additionally, if certain
4061 attributes are automatically enabled or disabled by this function
4062 in order to assure compatibility between options and
4063 configuration, the flags associated with those attributes are
4064 also set. By setting these "explicit bits", we avoid the risk
4065 that other code might accidentally overwrite these particular
4066 attributes with "default values".
4068 The various bits of rs6000_isa_flags are set to indicate the
4069 target options that have been selected for the most current
4070 compilation efforts. This has the effect of also turning on the
4071 associated TARGET_XXX values since these are macros which are
4072 generally defined to test the corresponding bit of the
4073 rs6000_isa_flags variable.
4075 The variable rs6000_builtin_mask is set to represent the target
4076 options for the most current compilation efforts, consistent with
4077 the current contents of rs6000_isa_flags. This variable controls
4078 expansion of built-in functions.
4080 Various other global variables and fields of global structures
4081 (over 50 in all) are initialized to reflect the desired options
4082 for the most current compilation efforts. */
4084 static bool
4085 rs6000_option_override_internal (bool global_init_p)
4087 bool ret = true;
4088 bool have_cpu = false;
4090 /* The default cpu requested at configure time, if any. */
4091 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
4093 HOST_WIDE_INT set_masks;
4094 HOST_WIDE_INT ignore_masks;
4095 int cpu_index;
4096 int tune_index;
4097 struct cl_target_option *main_target_opt
4098 = ((global_init_p || target_option_default_node == NULL)
4099 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4101 /* Print defaults. */
4102 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4103 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4105 /* Remember the explicit arguments. */
4106 if (global_init_p)
4107 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4109 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4110 library functions, so warn about it. The flag may be useful for
4111 performance studies from time to time though, so don't disable it
4112 entirely. */
4113 if (global_options_set.x_rs6000_alignment_flags
4114 && rs6000_alignment_flags == MASK_ALIGN_POWER
4115 && DEFAULT_ABI == ABI_DARWIN
4116 && TARGET_64BIT)
4117 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4118 " it is incompatible with the installed C and C++ libraries");
4120 /* Numerous experiment shows that IRA based loop pressure
4121 calculation works better for RTL loop invariant motion on targets
4122 with enough (>= 32) registers. It is an expensive optimization.
4123 So it is on only for peak performance. */
4124 if (optimize >= 3 && global_init_p
4125 && !global_options_set.x_flag_ira_loop_pressure)
4126 flag_ira_loop_pressure = 1;
4128 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4129 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4130 options were already specified. */
4131 if (flag_sanitize & SANITIZE_USER_ADDRESS
4132 && !global_options_set.x_flag_asynchronous_unwind_tables)
4133 flag_asynchronous_unwind_tables = 1;
4135 /* Set the pointer size. */
4136 if (TARGET_64BIT)
4138 rs6000_pmode = DImode;
4139 rs6000_pointer_size = 64;
4141 else
4143 rs6000_pmode = SImode;
4144 rs6000_pointer_size = 32;
4147 /* Some OSs don't support saving the high part of 64-bit registers on context
4148 switch. Other OSs don't support saving Altivec registers. On those OSs,
4149 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4150 if the user wants either, the user must explicitly specify them and we
4151 won't interfere with the user's specification. */
4153 set_masks = POWERPC_MASKS;
4154 #ifdef OS_MISSING_POWERPC64
4155 if (OS_MISSING_POWERPC64)
4156 set_masks &= ~OPTION_MASK_POWERPC64;
4157 #endif
4158 #ifdef OS_MISSING_ALTIVEC
4159 if (OS_MISSING_ALTIVEC)
4160 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4161 | OTHER_VSX_VECTOR_MASKS);
4162 #endif
4164 /* Don't override by the processor default if given explicitly. */
4165 set_masks &= ~rs6000_isa_flags_explicit;
4167 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4168 the cpu in a target attribute or pragma, but did not specify a tuning
4169 option, use the cpu for the tuning option rather than the option specified
4170 with -mtune on the command line. Process a '--with-cpu' configuration
4171 request as an implicit --cpu. */
4172 if (rs6000_cpu_index >= 0)
4174 cpu_index = rs6000_cpu_index;
4175 have_cpu = true;
4177 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4179 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4180 have_cpu = true;
4182 else if (implicit_cpu)
4184 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4185 have_cpu = true;
4187 else
4189 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4190 const char *default_cpu = ((!TARGET_POWERPC64)
4191 ? "powerpc"
4192 : ((BYTES_BIG_ENDIAN)
4193 ? "powerpc64"
4194 : "powerpc64le"));
4196 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4197 have_cpu = false;
4200 gcc_assert (cpu_index >= 0);
4202 if (have_cpu)
4204 #ifndef HAVE_AS_POWER9
4205 if (processor_target_table[rs6000_cpu_index].processor
4206 == PROCESSOR_POWER9)
4208 have_cpu = false;
4209 warning (0, "will not generate power9 instructions because "
4210 "assembler lacks power9 support");
4212 #endif
4213 #ifndef HAVE_AS_POWER8
4214 if (processor_target_table[rs6000_cpu_index].processor
4215 == PROCESSOR_POWER8)
4217 have_cpu = false;
4218 warning (0, "will not generate power8 instructions because "
4219 "assembler lacks power8 support");
4221 #endif
4222 #ifndef HAVE_AS_POPCNTD
4223 if (processor_target_table[rs6000_cpu_index].processor
4224 == PROCESSOR_POWER7)
4226 have_cpu = false;
4227 warning (0, "will not generate power7 instructions because "
4228 "assembler lacks power7 support");
4230 #endif
4231 #ifndef HAVE_AS_DFP
4232 if (processor_target_table[rs6000_cpu_index].processor
4233 == PROCESSOR_POWER6)
4235 have_cpu = false;
4236 warning (0, "will not generate power6 instructions because "
4237 "assembler lacks power6 support");
4239 #endif
4240 #ifndef HAVE_AS_POPCNTB
4241 if (processor_target_table[rs6000_cpu_index].processor
4242 == PROCESSOR_POWER5)
4244 have_cpu = false;
4245 warning (0, "will not generate power5 instructions because "
4246 "assembler lacks power5 support");
4248 #endif
4250 if (!have_cpu)
4252 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4253 const char *default_cpu = (!TARGET_POWERPC64
4254 ? "powerpc"
4255 : (BYTES_BIG_ENDIAN
4256 ? "powerpc64"
4257 : "powerpc64le"));
4259 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4263 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4264 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4265 with those from the cpu, except for options that were explicitly set. If
4266 we don't have a cpu, do not override the target bits set in
4267 TARGET_DEFAULT. */
4268 if (have_cpu)
4270 rs6000_isa_flags &= ~set_masks;
4271 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4272 & set_masks);
4274 else
4276 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4277 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4278 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4279 to using rs6000_isa_flags, we need to do the initialization here.
4281 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4282 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4283 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4284 : processor_target_table[cpu_index].target_enable);
4285 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4288 if (rs6000_tune_index >= 0)
4289 tune_index = rs6000_tune_index;
4290 else if (have_cpu)
4291 rs6000_tune_index = tune_index = cpu_index;
4292 else
4294 size_t i;
4295 enum processor_type tune_proc
4296 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4298 tune_index = -1;
4299 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4300 if (processor_target_table[i].processor == tune_proc)
4302 rs6000_tune_index = tune_index = i;
4303 break;
4307 gcc_assert (tune_index >= 0);
4308 rs6000_cpu = processor_target_table[tune_index].processor;
4310 /* Pick defaults for SPE related control flags. Do this early to make sure
4311 that the TARGET_ macros are representative ASAP. */
4313 int spe_capable_cpu =
4314 (rs6000_cpu == PROCESSOR_PPC8540
4315 || rs6000_cpu == PROCESSOR_PPC8548);
4317 if (!global_options_set.x_rs6000_spe_abi)
4318 rs6000_spe_abi = spe_capable_cpu;
4320 if (!global_options_set.x_rs6000_spe)
4321 rs6000_spe = spe_capable_cpu;
4323 if (!global_options_set.x_rs6000_float_gprs)
4324 rs6000_float_gprs =
4325 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4326 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4327 : 0);
4330 if (global_options_set.x_rs6000_spe_abi
4331 && rs6000_spe_abi
4332 && !TARGET_SPE_ABI)
4333 error ("not configured for SPE ABI");
4335 if (global_options_set.x_rs6000_spe
4336 && rs6000_spe
4337 && !TARGET_SPE)
4338 error ("not configured for SPE instruction set");
4340 if (main_target_opt != NULL
4341 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4342 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4343 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4344 error ("target attribute or pragma changes SPE ABI");
4346 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4347 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4348 || rs6000_cpu == PROCESSOR_PPCE5500)
4350 if (TARGET_ALTIVEC)
4351 error ("AltiVec not supported in this target");
4352 if (TARGET_SPE)
4353 error ("SPE not supported in this target");
4355 if (rs6000_cpu == PROCESSOR_PPCE6500)
4357 if (TARGET_SPE)
4358 error ("SPE not supported in this target");
4361 /* Disable Cell microcode if we are optimizing for the Cell
4362 and not optimizing for size. */
4363 if (rs6000_gen_cell_microcode == -1)
4364 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4365 && !optimize_size);
4367 /* If we are optimizing big endian systems for space and it's OK to
4368 use instructions that would be microcoded on the Cell, use the
4369 load/store multiple and string instructions. */
4370 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4371 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4372 | OPTION_MASK_STRING);
4374 /* Don't allow -mmultiple or -mstring on little endian systems
4375 unless the cpu is a 750, because the hardware doesn't support the
4376 instructions used in little endian mode, and causes an alignment
4377 trap. The 750 does not cause an alignment trap (except when the
4378 target is unaligned). */
4380 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4382 if (TARGET_MULTIPLE)
4384 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4385 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4386 warning (0, "-mmultiple is not supported on little endian systems");
4389 if (TARGET_STRING)
4391 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4392 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4393 warning (0, "-mstring is not supported on little endian systems");
4397 /* If little-endian, default to -mstrict-align on older processors.
4398 Testing for htm matches power8 and later. */
4399 if (!BYTES_BIG_ENDIAN
4400 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4401 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4403 /* -maltivec={le,be} implies -maltivec. */
4404 if (rs6000_altivec_element_order != 0)
4405 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4407 /* Disallow -maltivec=le in big endian mode for now. This is not
4408 known to be useful for anyone. */
4409 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4411 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4412 rs6000_altivec_element_order = 0;
4415 /* Add some warnings for VSX. */
4416 if (TARGET_VSX)
4418 const char *msg = NULL;
4419 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4420 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4422 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4423 msg = N_("-mvsx requires hardware floating point");
4424 else
4426 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4427 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4430 else if (TARGET_PAIRED_FLOAT)
4431 msg = N_("-mvsx and -mpaired are incompatible");
4432 else if (TARGET_AVOID_XFORM > 0)
4433 msg = N_("-mvsx needs indexed addressing");
4434 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4435 & OPTION_MASK_ALTIVEC))
4437 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4438 msg = N_("-mvsx and -mno-altivec are incompatible");
4439 else
4440 msg = N_("-mno-altivec disables vsx");
4443 if (msg)
4445 warning (0, msg);
4446 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4447 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4451 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4452 the -mcpu setting to enable options that conflict. */
4453 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4454 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4455 | OPTION_MASK_ALTIVEC
4456 | OPTION_MASK_VSX)) != 0)
4457 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4458 | OPTION_MASK_DIRECT_MOVE)
4459 & ~rs6000_isa_flags_explicit);
4461 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4462 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4464 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4465 off all of the options that depend on those flags. */
4466 ignore_masks = rs6000_disable_incompatible_switches ();
4468 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4469 unless the user explicitly used the -mno-<option> to disable the code. */
4470 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4471 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4472 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4473 else if (TARGET_P9_MINMAX)
4475 if (have_cpu)
4477 if (cpu_index == PROCESSOR_POWER9)
4479 /* legacy behavior: allow -mcpu-power9 with certain
4480 capabilities explicitly disabled. */
4481 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4482 /* However, reject this automatic fix if certain
4483 capabilities required for TARGET_P9_MINMAX support
4484 have been explicitly disabled. */
4485 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4486 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4487 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4488 | OPTION_MASK_UPPER_REGS_DF))
4489 error ("-mpower9-minmax incompatible with explicitly disabled options");
4491 else
4492 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4493 "<xxx> less than power9");
4495 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4496 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4497 & rs6000_isa_flags_explicit))
4498 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4499 were explicitly cleared. */
4500 error ("-mpower9-minmax incompatible with explicitly disabled options");
4501 else
4502 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4504 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4505 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4506 else if (TARGET_VSX)
4507 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4508 else if (TARGET_POPCNTD)
4509 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4510 else if (TARGET_DFP)
4511 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4512 else if (TARGET_CMPB)
4513 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4514 else if (TARGET_FPRND)
4515 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4516 else if (TARGET_POPCNTB)
4517 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4518 else if (TARGET_ALTIVEC)
4519 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4521 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4523 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4524 error ("-mcrypto requires -maltivec");
4525 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4528 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4530 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4531 error ("-mdirect-move requires -mvsx");
4532 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4535 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4537 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4538 error ("-mpower8-vector requires -maltivec");
4539 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4542 if (TARGET_P8_VECTOR && !TARGET_VSX)
4544 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4545 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4546 error ("-mpower8-vector requires -mvsx");
4547 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4549 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4550 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4551 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4553 else
4555 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4556 not explicit. */
4557 rs6000_isa_flags |= OPTION_MASK_VSX;
4558 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4562 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4564 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4565 error ("-mvsx-timode requires -mvsx");
4566 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4569 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4571 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4572 error ("-mhard-dfp requires -mhard-float");
4573 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4576 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4577 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4578 set the individual option. */
4579 if (TARGET_UPPER_REGS > 0)
4581 if (TARGET_VSX
4582 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4584 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4585 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4587 if (TARGET_VSX
4588 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4590 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4591 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4593 if (TARGET_P8_VECTOR
4594 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4596 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4597 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4600 else if (TARGET_UPPER_REGS == 0)
4602 if (TARGET_VSX
4603 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4605 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4606 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4608 if (TARGET_VSX
4609 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4611 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4612 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4614 if (TARGET_P8_VECTOR
4615 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4617 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4618 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4622 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4624 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4625 error ("-mupper-regs-df requires -mvsx");
4626 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4629 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4631 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4632 error ("-mupper-regs-di requires -mvsx");
4633 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4636 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4638 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4639 error ("-mupper-regs-sf requires -mpower8-vector");
4640 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4643 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4644 silently turn off quad memory mode. */
4645 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4647 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4648 warning (0, N_("-mquad-memory requires 64-bit mode"));
4650 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4651 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4653 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4654 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4657 /* Non-atomic quad memory load/store are disabled for little endian, since
4658 the words are reversed, but atomic operations can still be done by
4659 swapping the words. */
4660 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4662 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4663 warning (0, N_("-mquad-memory is not available in little endian mode"));
4665 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4668 /* Assume if the user asked for normal quad memory instructions, they want
4669 the atomic versions as well, unless they explicity told us not to use quad
4670 word atomic instructions. */
4671 if (TARGET_QUAD_MEMORY
4672 && !TARGET_QUAD_MEMORY_ATOMIC
4673 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4674 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4676 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4677 generating power8 instructions. */
4678 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4679 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4680 & OPTION_MASK_P8_FUSION);
4682 /* Setting additional fusion flags turns on base fusion. */
4683 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4685 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4687 if (TARGET_P8_FUSION_SIGN)
4688 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4690 if (TARGET_TOC_FUSION)
4691 error ("-mtoc-fusion requires -mpower8-fusion");
4693 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4695 else
4696 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4699 /* Power9 fusion is a superset over power8 fusion. */
4700 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4702 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4704 /* We prefer to not mention undocumented options in
4705 error messages. However, if users have managed to select
4706 power9-fusion without selecting power8-fusion, they
4707 already know about undocumented flags. */
4708 error ("-mpower9-fusion requires -mpower8-fusion");
4709 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4711 else
4712 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4715 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4716 generating power9 instructions. */
4717 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4718 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4719 & OPTION_MASK_P9_FUSION);
4721 /* Power8 does not fuse sign extended loads with the addis. If we are
4722 optimizing at high levels for speed, convert a sign extended load into a
4723 zero extending load, and an explicit sign extension. */
4724 if (TARGET_P8_FUSION
4725 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4726 && optimize_function_for_speed_p (cfun)
4727 && optimize >= 3)
4728 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4730 /* TOC fusion requires 64-bit and medium/large code model. */
4731 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4733 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4734 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4735 warning (0, N_("-mtoc-fusion requires 64-bit"));
4738 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4740 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4741 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4742 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4745 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4746 model. */
4747 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4748 && (TARGET_CMODEL != CMODEL_SMALL)
4749 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4750 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4752 /* ISA 3.0 vector instructions include ISA 2.07. */
4753 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4755 /* We prefer to not mention undocumented options in
4756 error messages. However, if users have managed to select
4757 power9-vector without selecting power8-vector, they
4758 already know about undocumented flags. */
4759 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4760 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4761 error ("-mpower9-vector requires -mpower8-vector");
4762 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4764 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4765 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4766 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4768 else
4770 /* OPTION_MASK_P9_VECTOR is explicit and
4771 OPTION_MASK_P8_VECTOR is not explicit. */
4772 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4773 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4777 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4778 -mpower9-dform-vector. */
4779 if (TARGET_P9_DFORM_BOTH > 0)
4781 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4782 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4784 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4785 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4787 else if (TARGET_P9_DFORM_BOTH == 0)
4789 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4790 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4792 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4793 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4796 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4797 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4799 /* We prefer to not mention undocumented options in
4800 error messages. However, if users have managed to select
4801 power9-dform without selecting power9-vector, they
4802 already know about undocumented flags. */
4803 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4804 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4805 | OPTION_MASK_P9_DFORM_VECTOR)))
4806 error ("-mpower9-dform requires -mpower9-vector");
4807 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4809 rs6000_isa_flags &=
4810 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4811 rs6000_isa_flags_explicit |=
4812 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4814 else
4816 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4817 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4818 may be explicit. */
4819 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4820 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4824 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4825 && !TARGET_DIRECT_MOVE)
4827 /* We prefer to not mention undocumented options in
4828 error messages. However, if users have managed to select
4829 power9-dform without selecting direct-move, they
4830 already know about undocumented flags. */
4831 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4832 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4833 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4834 (TARGET_P9_DFORM_BOTH == 1)))
4835 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4836 " require -mdirect-move");
4837 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4839 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4840 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4842 else
4844 rs6000_isa_flags &=
4845 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4846 rs6000_isa_flags_explicit |=
4847 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4851 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4853 /* We prefer to not mention undocumented options in
4854 error messages. However, if users have managed to select
4855 power9-dform without selecting upper-regs-df, they
4856 already know about undocumented flags. */
4857 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4858 error ("-mpower9-dform requires -mupper-regs-df");
4859 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4862 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4864 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4865 error ("-mpower9-dform requires -mupper-regs-sf");
4866 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4869 /* Enable LRA by default. */
4870 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4871 rs6000_isa_flags |= OPTION_MASK_LRA;
4873 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4874 but do show up with -mno-lra. Given -mlra will become the default once
4875 PR 69847 is fixed, turn off the options with problems by default if
4876 -mno-lra was used, and warn if the user explicitly asked for the option.
4878 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4879 Enable -mvsx-timode by default if LRA and VSX. */
4880 if (!TARGET_LRA)
4882 if (TARGET_VSX_TIMODE)
4884 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4885 warning (0, "-mvsx-timode might need -mlra");
4887 else
4888 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4892 else
4894 if (TARGET_VSX && !TARGET_VSX_TIMODE
4895 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4896 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4899 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4900 support. If we only have ISA 2.06 support, and the user did not specify
4901 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4902 but we don't enable the full vectorization support */
4903 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4904 TARGET_ALLOW_MOVMISALIGN = 1;
4906 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4908 if (TARGET_ALLOW_MOVMISALIGN > 0
4909 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4910 error ("-mallow-movmisalign requires -mvsx");
4912 TARGET_ALLOW_MOVMISALIGN = 0;
4915 /* Determine when unaligned vector accesses are permitted, and when
4916 they are preferred over masked Altivec loads. Note that if
4917 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4918 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4919 not true. */
4920 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4922 if (!TARGET_VSX)
4924 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4925 error ("-mefficient-unaligned-vsx requires -mvsx");
4927 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4930 else if (!TARGET_ALLOW_MOVMISALIGN)
4932 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4933 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4935 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4939 /* Check whether we should allow small integers into VSX registers. We
4940 require direct move to prevent the register allocator from having to move
4941 variables through memory to do moves. SImode can be used on ISA 2.07,
4942 while HImode and QImode require ISA 3.0. */
4943 if (TARGET_VSX_SMALL_INTEGER
4944 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4946 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4947 error ("-mvsx-small-integer requires -mpower8-vector, "
4948 "-mupper-regs-di, and -mdirect-move");
4950 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4953 /* Set long double size before the IEEE 128-bit tests. */
4954 if (!global_options_set.x_rs6000_long_double_type_size)
4956 if (main_target_opt != NULL
4957 && (main_target_opt->x_rs6000_long_double_type_size
4958 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4959 error ("target attribute or pragma changes long double size");
4960 else
4961 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4964 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4965 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4966 pick up this default. */
4967 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4968 if (!global_options_set.x_rs6000_ieeequad)
4969 rs6000_ieeequad = 1;
4970 #endif
4972 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4973 sytems, but don't enable the __float128 keyword. */
4974 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4975 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4976 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4977 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4979 /* IEEE 128-bit floating point requires VSX support. */
4980 if (!TARGET_VSX)
4982 if (TARGET_FLOAT128_KEYWORD)
4984 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4985 error ("-mfloat128 requires VSX support");
4987 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4988 | OPTION_MASK_FLOAT128_KEYWORD
4989 | OPTION_MASK_FLOAT128_HW);
4992 else if (TARGET_FLOAT128_TYPE)
4994 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4995 error ("-mfloat128-type requires VSX support");
4997 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4998 | OPTION_MASK_FLOAT128_KEYWORD
4999 | OPTION_MASK_FLOAT128_HW);
5003 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
5004 128-bit floating point support to be enabled. */
5005 if (!TARGET_FLOAT128_TYPE)
5007 if (TARGET_FLOAT128_KEYWORD)
5009 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
5011 error ("-mfloat128 requires -mfloat128-type");
5012 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5013 | OPTION_MASK_FLOAT128_KEYWORD
5014 | OPTION_MASK_FLOAT128_HW);
5016 else
5017 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
5020 if (TARGET_FLOAT128_HW)
5022 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5024 error ("-mfloat128-hardware requires -mfloat128-type");
5025 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5027 else
5028 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5029 | OPTION_MASK_FLOAT128_KEYWORD
5030 | OPTION_MASK_FLOAT128_HW);
5034 /* If we have -mfloat128-type and full ISA 3.0 support, enable
5035 -mfloat128-hardware by default. However, don't enable the __float128
5036 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
5037 -mfloat128 option as well if it was not already set. */
5038 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
5039 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
5040 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
5041 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
5043 if (TARGET_FLOAT128_HW
5044 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
5046 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5047 error ("-mfloat128-hardware requires full ISA 3.0 support");
5049 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5052 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
5054 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5055 error ("-mfloat128-hardware requires -m64");
5057 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5060 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
5061 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
5062 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
5063 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
5065 /* Print the options after updating the defaults. */
5066 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5067 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
5069 /* E500mc does "better" if we inline more aggressively. Respect the
5070 user's opinion, though. */
5071 if (rs6000_block_move_inline_limit == 0
5072 && (rs6000_cpu == PROCESSOR_PPCE500MC
5073 || rs6000_cpu == PROCESSOR_PPCE500MC64
5074 || rs6000_cpu == PROCESSOR_PPCE5500
5075 || rs6000_cpu == PROCESSOR_PPCE6500))
5076 rs6000_block_move_inline_limit = 128;
5078 /* store_one_arg depends on expand_block_move to handle at least the
5079 size of reg_parm_stack_space. */
5080 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
5081 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
5083 if (global_init_p)
5085 /* If the appropriate debug option is enabled, replace the target hooks
5086 with debug versions that call the real version and then prints
5087 debugging information. */
5088 if (TARGET_DEBUG_COST)
5090 targetm.rtx_costs = rs6000_debug_rtx_costs;
5091 targetm.address_cost = rs6000_debug_address_cost;
5092 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
5095 if (TARGET_DEBUG_ADDR)
5097 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
5098 targetm.legitimize_address = rs6000_debug_legitimize_address;
5099 rs6000_secondary_reload_class_ptr
5100 = rs6000_debug_secondary_reload_class;
5101 targetm.secondary_memory_needed
5102 = rs6000_debug_secondary_memory_needed;
5103 targetm.can_change_mode_class
5104 = rs6000_debug_can_change_mode_class;
5105 rs6000_preferred_reload_class_ptr
5106 = rs6000_debug_preferred_reload_class;
5107 rs6000_legitimize_reload_address_ptr
5108 = rs6000_debug_legitimize_reload_address;
5109 rs6000_mode_dependent_address_ptr
5110 = rs6000_debug_mode_dependent_address;
5113 if (rs6000_veclibabi_name)
5115 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
5116 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
5117 else
5119 error ("unknown vectorization library ABI type (%s) for "
5120 "-mveclibabi= switch", rs6000_veclibabi_name);
5121 ret = false;
5126 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5127 target attribute or pragma which automatically enables both options,
5128 unless the altivec ABI was set. This is set by default for 64-bit, but
5129 not for 32-bit. */
5130 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5131 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
5132 | OPTION_MASK_FLOAT128_TYPE
5133 | OPTION_MASK_FLOAT128_KEYWORD)
5134 & ~rs6000_isa_flags_explicit);
5136 /* Enable Altivec ABI for AIX -maltivec. */
5137 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
5139 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5140 error ("target attribute or pragma changes AltiVec ABI");
5141 else
5142 rs6000_altivec_abi = 1;
5145 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5146 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5147 be explicitly overridden in either case. */
5148 if (TARGET_ELF)
5150 if (!global_options_set.x_rs6000_altivec_abi
5151 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
5153 if (main_target_opt != NULL &&
5154 !main_target_opt->x_rs6000_altivec_abi)
5155 error ("target attribute or pragma changes AltiVec ABI");
5156 else
5157 rs6000_altivec_abi = 1;
5161 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5162 So far, the only darwin64 targets are also MACH-O. */
5163 if (TARGET_MACHO
5164 && DEFAULT_ABI == ABI_DARWIN
5165 && TARGET_64BIT)
5167 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
5168 error ("target attribute or pragma changes darwin64 ABI");
5169 else
5171 rs6000_darwin64_abi = 1;
5172 /* Default to natural alignment, for better performance. */
5173 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
5177 /* Place FP constants in the constant pool instead of TOC
5178 if section anchors enabled. */
5179 if (flag_section_anchors
5180 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
5181 TARGET_NO_FP_IN_TOC = 1;
5183 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5184 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
5186 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5187 SUBTARGET_OVERRIDE_OPTIONS;
5188 #endif
5189 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5190 SUBSUBTARGET_OVERRIDE_OPTIONS;
5191 #endif
5192 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5193 SUB3TARGET_OVERRIDE_OPTIONS;
5194 #endif
5196 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5197 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5199 /* For the E500 family of cores, reset the single/double FP flags to let us
5200 check that they remain constant across attributes or pragmas. Also,
5201 clear a possible request for string instructions, not supported and which
5202 we might have silently queried above for -Os.
5204 For other families, clear ISEL in case it was set implicitly.
5207 switch (rs6000_cpu)
5209 case PROCESSOR_PPC8540:
5210 case PROCESSOR_PPC8548:
5211 case PROCESSOR_PPCE500MC:
5212 case PROCESSOR_PPCE500MC64:
5213 case PROCESSOR_PPCE5500:
5214 case PROCESSOR_PPCE6500:
5216 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5217 rs6000_double_float = TARGET_E500_DOUBLE;
5219 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5221 break;
5223 default:
5225 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5226 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5228 break;
5231 if (main_target_opt)
5233 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5234 error ("target attribute or pragma changes single precision floating "
5235 "point");
5236 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5237 error ("target attribute or pragma changes double precision floating "
5238 "point");
5241 /* Detect invalid option combinations with E500. */
5242 CHECK_E500_OPTIONS;
5244 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5245 && rs6000_cpu != PROCESSOR_POWER5
5246 && rs6000_cpu != PROCESSOR_POWER6
5247 && rs6000_cpu != PROCESSOR_POWER7
5248 && rs6000_cpu != PROCESSOR_POWER8
5249 && rs6000_cpu != PROCESSOR_POWER9
5250 && rs6000_cpu != PROCESSOR_PPCA2
5251 && rs6000_cpu != PROCESSOR_CELL
5252 && rs6000_cpu != PROCESSOR_PPC476);
5253 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5254 || rs6000_cpu == PROCESSOR_POWER5
5255 || rs6000_cpu == PROCESSOR_POWER7
5256 || rs6000_cpu == PROCESSOR_POWER8);
5257 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5258 || rs6000_cpu == PROCESSOR_POWER5
5259 || rs6000_cpu == PROCESSOR_POWER6
5260 || rs6000_cpu == PROCESSOR_POWER7
5261 || rs6000_cpu == PROCESSOR_POWER8
5262 || rs6000_cpu == PROCESSOR_POWER9
5263 || rs6000_cpu == PROCESSOR_PPCE500MC
5264 || rs6000_cpu == PROCESSOR_PPCE500MC64
5265 || rs6000_cpu == PROCESSOR_PPCE5500
5266 || rs6000_cpu == PROCESSOR_PPCE6500);
5268 /* Allow debug switches to override the above settings. These are set to -1
5269 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5270 if (TARGET_ALWAYS_HINT >= 0)
5271 rs6000_always_hint = TARGET_ALWAYS_HINT;
5273 if (TARGET_SCHED_GROUPS >= 0)
5274 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5276 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5277 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5279 rs6000_sched_restricted_insns_priority
5280 = (rs6000_sched_groups ? 1 : 0);
5282 /* Handle -msched-costly-dep option. */
5283 rs6000_sched_costly_dep
5284 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5286 if (rs6000_sched_costly_dep_str)
5288 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5289 rs6000_sched_costly_dep = no_dep_costly;
5290 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5291 rs6000_sched_costly_dep = all_deps_costly;
5292 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5293 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5294 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5295 rs6000_sched_costly_dep = store_to_load_dep_costly;
5296 else
5297 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5298 atoi (rs6000_sched_costly_dep_str));
5301 /* Handle -minsert-sched-nops option. */
5302 rs6000_sched_insert_nops
5303 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5305 if (rs6000_sched_insert_nops_str)
5307 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5308 rs6000_sched_insert_nops = sched_finish_none;
5309 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5310 rs6000_sched_insert_nops = sched_finish_pad_groups;
5311 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5312 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5313 else
5314 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5315 atoi (rs6000_sched_insert_nops_str));
5318 /* Handle stack protector */
5319 if (!global_options_set.x_rs6000_stack_protector_guard)
5320 #ifdef TARGET_THREAD_SSP_OFFSET
5321 rs6000_stack_protector_guard = SSP_TLS;
5322 #else
5323 rs6000_stack_protector_guard = SSP_GLOBAL;
5324 #endif
5326 #ifdef TARGET_THREAD_SSP_OFFSET
5327 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5328 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5329 #endif
5331 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5333 char *endp;
5334 const char *str = rs6000_stack_protector_guard_offset_str;
5336 errno = 0;
5337 long offset = strtol (str, &endp, 0);
5338 if (!*str || *endp || errno)
5339 error ("%qs is not a valid number "
5340 "in -mstack-protector-guard-offset=", str);
5342 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5343 || (TARGET_64BIT && (offset & 3)))
5344 error ("%qs is not a valid offset "
5345 "in -mstack-protector-guard-offset=", str);
5347 rs6000_stack_protector_guard_offset = offset;
5350 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5352 const char *str = rs6000_stack_protector_guard_reg_str;
5353 int reg = decode_reg_name (str);
5355 if (!IN_RANGE (reg, 1, 31))
5356 error ("%qs is not a valid base register "
5357 "in -mstack-protector-guard-reg=", str);
5359 rs6000_stack_protector_guard_reg = reg;
5362 if (rs6000_stack_protector_guard == SSP_TLS
5363 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5364 error ("-mstack-protector-guard=tls needs a valid base register");
5366 if (global_init_p)
5368 #ifdef TARGET_REGNAMES
5369 /* If the user desires alternate register names, copy in the
5370 alternate names now. */
5371 if (TARGET_REGNAMES)
5372 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5373 #endif
5375 /* Set aix_struct_return last, after the ABI is determined.
5376 If -maix-struct-return or -msvr4-struct-return was explicitly
5377 used, don't override with the ABI default. */
5378 if (!global_options_set.x_aix_struct_return)
5379 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5381 #if 0
5382 /* IBM XL compiler defaults to unsigned bitfields. */
5383 if (TARGET_XL_COMPAT)
5384 flag_signed_bitfields = 0;
5385 #endif
5387 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5388 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5390 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5392 /* We can only guarantee the availability of DI pseudo-ops when
5393 assembling for 64-bit targets. */
5394 if (!TARGET_64BIT)
5396 targetm.asm_out.aligned_op.di = NULL;
5397 targetm.asm_out.unaligned_op.di = NULL;
5401 /* Set branch target alignment, if not optimizing for size. */
5402 if (!optimize_size)
5404 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5405 aligned 8byte to avoid misprediction by the branch predictor. */
5406 if (rs6000_cpu == PROCESSOR_TITAN
5407 || rs6000_cpu == PROCESSOR_CELL)
5409 if (align_functions <= 0)
5410 align_functions = 8;
5411 if (align_jumps <= 0)
5412 align_jumps = 8;
5413 if (align_loops <= 0)
5414 align_loops = 8;
5416 if (rs6000_align_branch_targets)
5418 if (align_functions <= 0)
5419 align_functions = 16;
5420 if (align_jumps <= 0)
5421 align_jumps = 16;
5422 if (align_loops <= 0)
5424 can_override_loop_align = 1;
5425 align_loops = 16;
5428 if (align_jumps_max_skip <= 0)
5429 align_jumps_max_skip = 15;
5430 if (align_loops_max_skip <= 0)
5431 align_loops_max_skip = 15;
5434 /* Arrange to save and restore machine status around nested functions. */
5435 init_machine_status = rs6000_init_machine_status;
5437 /* We should always be splitting complex arguments, but we can't break
5438 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5439 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5440 targetm.calls.split_complex_arg = NULL;
5442 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5443 if (DEFAULT_ABI == ABI_AIX)
5444 targetm.calls.custom_function_descriptors = 0;
5447 /* Initialize rs6000_cost with the appropriate target costs. */
5448 if (optimize_size)
5449 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5450 else
5451 switch (rs6000_cpu)
5453 case PROCESSOR_RS64A:
5454 rs6000_cost = &rs64a_cost;
5455 break;
5457 case PROCESSOR_MPCCORE:
5458 rs6000_cost = &mpccore_cost;
5459 break;
5461 case PROCESSOR_PPC403:
5462 rs6000_cost = &ppc403_cost;
5463 break;
5465 case PROCESSOR_PPC405:
5466 rs6000_cost = &ppc405_cost;
5467 break;
5469 case PROCESSOR_PPC440:
5470 rs6000_cost = &ppc440_cost;
5471 break;
5473 case PROCESSOR_PPC476:
5474 rs6000_cost = &ppc476_cost;
5475 break;
5477 case PROCESSOR_PPC601:
5478 rs6000_cost = &ppc601_cost;
5479 break;
5481 case PROCESSOR_PPC603:
5482 rs6000_cost = &ppc603_cost;
5483 break;
5485 case PROCESSOR_PPC604:
5486 rs6000_cost = &ppc604_cost;
5487 break;
5489 case PROCESSOR_PPC604e:
5490 rs6000_cost = &ppc604e_cost;
5491 break;
5493 case PROCESSOR_PPC620:
5494 rs6000_cost = &ppc620_cost;
5495 break;
5497 case PROCESSOR_PPC630:
5498 rs6000_cost = &ppc630_cost;
5499 break;
5501 case PROCESSOR_CELL:
5502 rs6000_cost = &ppccell_cost;
5503 break;
5505 case PROCESSOR_PPC750:
5506 case PROCESSOR_PPC7400:
5507 rs6000_cost = &ppc750_cost;
5508 break;
5510 case PROCESSOR_PPC7450:
5511 rs6000_cost = &ppc7450_cost;
5512 break;
5514 case PROCESSOR_PPC8540:
5515 case PROCESSOR_PPC8548:
5516 rs6000_cost = &ppc8540_cost;
5517 break;
5519 case PROCESSOR_PPCE300C2:
5520 case PROCESSOR_PPCE300C3:
5521 rs6000_cost = &ppce300c2c3_cost;
5522 break;
5524 case PROCESSOR_PPCE500MC:
5525 rs6000_cost = &ppce500mc_cost;
5526 break;
5528 case PROCESSOR_PPCE500MC64:
5529 rs6000_cost = &ppce500mc64_cost;
5530 break;
5532 case PROCESSOR_PPCE5500:
5533 rs6000_cost = &ppce5500_cost;
5534 break;
5536 case PROCESSOR_PPCE6500:
5537 rs6000_cost = &ppce6500_cost;
5538 break;
5540 case PROCESSOR_TITAN:
5541 rs6000_cost = &titan_cost;
5542 break;
5544 case PROCESSOR_POWER4:
5545 case PROCESSOR_POWER5:
5546 rs6000_cost = &power4_cost;
5547 break;
5549 case PROCESSOR_POWER6:
5550 rs6000_cost = &power6_cost;
5551 break;
5553 case PROCESSOR_POWER7:
5554 rs6000_cost = &power7_cost;
5555 break;
5557 case PROCESSOR_POWER8:
5558 rs6000_cost = &power8_cost;
5559 break;
5561 case PROCESSOR_POWER9:
5562 rs6000_cost = &power9_cost;
5563 break;
5565 case PROCESSOR_PPCA2:
5566 rs6000_cost = &ppca2_cost;
5567 break;
5569 default:
5570 gcc_unreachable ();
5573 if (global_init_p)
5575 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5576 rs6000_cost->simultaneous_prefetches,
5577 global_options.x_param_values,
5578 global_options_set.x_param_values);
5579 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5580 global_options.x_param_values,
5581 global_options_set.x_param_values);
5582 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5583 rs6000_cost->cache_line_size,
5584 global_options.x_param_values,
5585 global_options_set.x_param_values);
5586 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5587 global_options.x_param_values,
5588 global_options_set.x_param_values);
5590 /* Increase loop peeling limits based on performance analysis. */
5591 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5592 global_options.x_param_values,
5593 global_options_set.x_param_values);
5594 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5595 global_options.x_param_values,
5596 global_options_set.x_param_values);
5598 /* Use the 'model' -fsched-pressure algorithm by default. */
5599 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5600 SCHED_PRESSURE_MODEL,
5601 global_options.x_param_values,
5602 global_options_set.x_param_values);
5604 /* If using typedef char *va_list, signal that
5605 __builtin_va_start (&ap, 0) can be optimized to
5606 ap = __builtin_next_arg (0). */
5607 if (DEFAULT_ABI != ABI_V4)
5608 targetm.expand_builtin_va_start = NULL;
5611 /* Set up single/double float flags.
5612 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5613 then set both flags. */
5614 if (TARGET_HARD_FLOAT && TARGET_FPRS
5615 && rs6000_single_float == 0 && rs6000_double_float == 0)
5616 rs6000_single_float = rs6000_double_float = 1;
5618 /* If not explicitly specified via option, decide whether to generate indexed
5619 load/store instructions. A value of -1 indicates that the
5620 initial value of this variable has not been overwritten. During
5621 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5622 if (TARGET_AVOID_XFORM == -1)
5623 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5624 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5625 need indexed accesses and the type used is the scalar type of the element
5626 being loaded or stored. */
5627 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5628 && !TARGET_ALTIVEC);
5630 /* Set the -mrecip options. */
5631 if (rs6000_recip_name)
5633 char *p = ASTRDUP (rs6000_recip_name);
5634 char *q;
5635 unsigned int mask, i;
5636 bool invert;
5638 while ((q = strtok (p, ",")) != NULL)
5640 p = NULL;
5641 if (*q == '!')
5643 invert = true;
5644 q++;
5646 else
5647 invert = false;
5649 if (!strcmp (q, "default"))
5650 mask = ((TARGET_RECIP_PRECISION)
5651 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5652 else
5654 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5655 if (!strcmp (q, recip_options[i].string))
5657 mask = recip_options[i].mask;
5658 break;
5661 if (i == ARRAY_SIZE (recip_options))
5663 error ("unknown option for -mrecip=%s", q);
5664 invert = false;
5665 mask = 0;
5666 ret = false;
5670 if (invert)
5671 rs6000_recip_control &= ~mask;
5672 else
5673 rs6000_recip_control |= mask;
5677 /* Set the builtin mask of the various options used that could affect which
5678 builtins were used. In the past we used target_flags, but we've run out
5679 of bits, and some options like SPE and PAIRED are no longer in
5680 target_flags. */
5681 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5682 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5683 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5684 rs6000_builtin_mask);
5686 /* Initialize all of the registers. */
5687 rs6000_init_hard_regno_mode_ok (global_init_p);
5689 /* Save the initial options in case the user does function specific options */
5690 if (global_init_p)
5691 target_option_default_node = target_option_current_node
5692 = build_target_option_node (&global_options);
5694 /* If not explicitly specified via option, decide whether to generate the
5695 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5696 if (TARGET_LINK_STACK == -1)
5697 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5699 return ret;
5702 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5703 define the target cpu type. */
5705 static void
5706 rs6000_option_override (void)
5708 (void) rs6000_option_override_internal (true);
5712 /* Implement targetm.vectorize.builtin_mask_for_load. */
5713 static tree
5714 rs6000_builtin_mask_for_load (void)
5716 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5717 if ((TARGET_ALTIVEC && !TARGET_VSX)
5718 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5719 return altivec_builtin_mask_for_load;
5720 else
5721 return 0;
5724 /* Implement LOOP_ALIGN. */
5726 rs6000_loop_align (rtx label)
5728 basic_block bb;
5729 int ninsns;
5731 /* Don't override loop alignment if -falign-loops was specified. */
5732 if (!can_override_loop_align)
5733 return align_loops_log;
5735 bb = BLOCK_FOR_INSN (label);
5736 ninsns = num_loop_insns(bb->loop_father);
5738 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5739 if (ninsns > 4 && ninsns <= 8
5740 && (rs6000_cpu == PROCESSOR_POWER4
5741 || rs6000_cpu == PROCESSOR_POWER5
5742 || rs6000_cpu == PROCESSOR_POWER6
5743 || rs6000_cpu == PROCESSOR_POWER7
5744 || rs6000_cpu == PROCESSOR_POWER8
5745 || rs6000_cpu == PROCESSOR_POWER9))
5746 return 5;
5747 else
5748 return align_loops_log;
5751 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5752 static int
5753 rs6000_loop_align_max_skip (rtx_insn *label)
5755 return (1 << rs6000_loop_align (label)) - 1;
5758 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5759 after applying N number of iterations. This routine does not determine
5760 how may iterations are required to reach desired alignment. */
5762 static bool
5763 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5765 if (is_packed)
5766 return false;
5768 if (TARGET_32BIT)
5770 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5771 return true;
5773 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5774 return true;
5776 return false;
5778 else
5780 if (TARGET_MACHO)
5781 return false;
5783 /* Assuming that all other types are naturally aligned. CHECKME! */
5784 return true;
5788 /* Return true if the vector misalignment factor is supported by the
5789 target. */
5790 static bool
5791 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5792 const_tree type,
5793 int misalignment,
5794 bool is_packed)
5796 if (TARGET_VSX)
5798 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5799 return true;
5801 /* Return if movmisalign pattern is not supported for this mode. */
5802 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5803 return false;
5805 if (misalignment == -1)
5807 /* Misalignment factor is unknown at compile time but we know
5808 it's word aligned. */
5809 if (rs6000_vector_alignment_reachable (type, is_packed))
5811 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5813 if (element_size == 64 || element_size == 32)
5814 return true;
5817 return false;
5820 /* VSX supports word-aligned vector. */
5821 if (misalignment % 4 == 0)
5822 return true;
5824 return false;
5827 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5828 static int
5829 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5830 tree vectype, int misalign)
5832 unsigned elements;
5833 tree elem_type;
5835 switch (type_of_cost)
5837 case scalar_stmt:
5838 case scalar_load:
5839 case scalar_store:
5840 case vector_stmt:
5841 case vector_load:
5842 case vector_store:
5843 case vec_to_scalar:
5844 case scalar_to_vec:
5845 case cond_branch_not_taken:
5846 return 1;
5848 case vec_perm:
5849 if (TARGET_VSX)
5850 return 3;
5851 else
5852 return 1;
5854 case vec_promote_demote:
5855 if (TARGET_VSX)
5856 return 4;
5857 else
5858 return 1;
5860 case cond_branch_taken:
5861 return 3;
5863 case unaligned_load:
5864 case vector_gather_load:
5865 if (TARGET_P9_VECTOR)
5866 return 3;
5868 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5869 return 1;
5871 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5873 elements = TYPE_VECTOR_SUBPARTS (vectype);
5874 if (elements == 2)
5875 /* Double word aligned. */
5876 return 2;
5878 if (elements == 4)
5880 switch (misalign)
5882 case 8:
5883 /* Double word aligned. */
5884 return 2;
5886 case -1:
5887 /* Unknown misalignment. */
5888 case 4:
5889 case 12:
5890 /* Word aligned. */
5891 return 22;
5893 default:
5894 gcc_unreachable ();
5899 if (TARGET_ALTIVEC)
5900 /* Misaligned loads are not supported. */
5901 gcc_unreachable ();
5903 return 2;
5905 case unaligned_store:
5906 case vector_scatter_store:
5907 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5908 return 1;
5910 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5912 elements = TYPE_VECTOR_SUBPARTS (vectype);
5913 if (elements == 2)
5914 /* Double word aligned. */
5915 return 2;
5917 if (elements == 4)
5919 switch (misalign)
5921 case 8:
5922 /* Double word aligned. */
5923 return 2;
5925 case -1:
5926 /* Unknown misalignment. */
5927 case 4:
5928 case 12:
5929 /* Word aligned. */
5930 return 23;
5932 default:
5933 gcc_unreachable ();
5938 if (TARGET_ALTIVEC)
5939 /* Misaligned stores are not supported. */
5940 gcc_unreachable ();
5942 return 2;
5944 case vec_construct:
5945 /* This is a rough approximation assuming non-constant elements
5946 constructed into a vector via element insertion. FIXME:
5947 vec_construct is not granular enough for uniformly good
5948 decisions. If the initialization is a splat, this is
5949 cheaper than we estimate. Improve this someday. */
5950 elem_type = TREE_TYPE (vectype);
5951 /* 32-bit vectors loaded into registers are stored as double
5952 precision, so we need 2 permutes, 2 converts, and 1 merge
5953 to construct a vector of short floats from them. */
5954 if (SCALAR_FLOAT_TYPE_P (elem_type)
5955 && TYPE_PRECISION (elem_type) == 32)
5956 return 5;
5957 /* On POWER9, integer vector types are built up in GPRs and then
5958 use a direct move (2 cycles). For POWER8 this is even worse,
5959 as we need two direct moves and a merge, and the direct moves
5960 are five cycles. */
5961 else if (INTEGRAL_TYPE_P (elem_type))
5963 if (TARGET_P9_VECTOR)
5964 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5965 else
5966 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5968 else
5969 /* V2DFmode doesn't need a direct move. */
5970 return 2;
5972 default:
5973 gcc_unreachable ();
5977 /* Implement targetm.vectorize.preferred_simd_mode. */
5979 static machine_mode
5980 rs6000_preferred_simd_mode (scalar_mode mode)
5982 if (TARGET_VSX)
5983 switch (mode)
5985 case E_DFmode:
5986 return V2DFmode;
5987 default:;
5989 if (TARGET_ALTIVEC || TARGET_VSX)
5990 switch (mode)
5992 case E_SFmode:
5993 return V4SFmode;
5994 case E_TImode:
5995 return V1TImode;
5996 case E_DImode:
5997 return V2DImode;
5998 case E_SImode:
5999 return V4SImode;
6000 case E_HImode:
6001 return V8HImode;
6002 case E_QImode:
6003 return V16QImode;
6004 default:;
6006 if (TARGET_SPE)
6007 switch (mode)
6009 case E_SFmode:
6010 return V2SFmode;
6011 case E_SImode:
6012 return V2SImode;
6013 default:;
6015 if (TARGET_PAIRED_FLOAT
6016 && mode == SFmode)
6017 return V2SFmode;
6018 return word_mode;
6021 typedef struct _rs6000_cost_data
6023 struct loop *loop_info;
6024 unsigned cost[3];
6025 } rs6000_cost_data;
6027 /* Test for likely overcommitment of vector hardware resources. If a
6028 loop iteration is relatively large, and too large a percentage of
6029 instructions in the loop are vectorized, the cost model may not
6030 adequately reflect delays from unavailable vector resources.
6031 Penalize the loop body cost for this case. */
6033 static void
6034 rs6000_density_test (rs6000_cost_data *data)
6036 const int DENSITY_PCT_THRESHOLD = 85;
6037 const int DENSITY_SIZE_THRESHOLD = 70;
6038 const int DENSITY_PENALTY = 10;
6039 struct loop *loop = data->loop_info;
6040 basic_block *bbs = get_loop_body (loop);
6041 int nbbs = loop->num_nodes;
6042 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
6043 int i, density_pct;
6045 for (i = 0; i < nbbs; i++)
6047 basic_block bb = bbs[i];
6048 gimple_stmt_iterator gsi;
6050 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6052 gimple *stmt = gsi_stmt (gsi);
6053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6055 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6056 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
6057 not_vec_cost++;
6061 free (bbs);
6062 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
6064 if (density_pct > DENSITY_PCT_THRESHOLD
6065 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
6067 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
6068 if (dump_enabled_p ())
6069 dump_printf_loc (MSG_NOTE, vect_location,
6070 "density %d%%, cost %d exceeds threshold, penalizing "
6071 "loop body cost by %d%%", density_pct,
6072 vec_cost + not_vec_cost, DENSITY_PENALTY);
6076 /* Implement targetm.vectorize.init_cost. */
6078 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
6079 instruction is needed by the vectorization. */
6080 static bool rs6000_vect_nonmem;
6082 static void *
6083 rs6000_init_cost (struct loop *loop_info)
6085 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
6086 data->loop_info = loop_info;
6087 data->cost[vect_prologue] = 0;
6088 data->cost[vect_body] = 0;
6089 data->cost[vect_epilogue] = 0;
6090 rs6000_vect_nonmem = false;
6091 return data;
6094 /* Implement targetm.vectorize.add_stmt_cost. */
6096 static unsigned
6097 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6098 struct _stmt_vec_info *stmt_info, int misalign,
6099 enum vect_cost_model_location where)
6101 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6102 unsigned retval = 0;
6104 if (flag_vect_cost_model)
6106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6107 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
6108 misalign);
6109 /* Statements in an inner loop relative to the loop being
6110 vectorized are weighted more heavily. The value here is
6111 arbitrary and could potentially be improved with analysis. */
6112 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6113 count *= 50; /* FIXME. */
6115 retval = (unsigned) (count * stmt_cost);
6116 cost_data->cost[where] += retval;
6118 /* Check whether we're doing something other than just a copy loop.
6119 Not all such loops may be profitably vectorized; see
6120 rs6000_finish_cost. */
6121 if ((kind == vec_to_scalar || kind == vec_perm
6122 || kind == vec_promote_demote || kind == vec_construct
6123 || kind == scalar_to_vec)
6124 || (where == vect_body && kind == vector_stmt))
6125 rs6000_vect_nonmem = true;
6128 return retval;
6131 /* Implement targetm.vectorize.finish_cost. */
6133 static void
6134 rs6000_finish_cost (void *data, unsigned *prologue_cost,
6135 unsigned *body_cost, unsigned *epilogue_cost)
6137 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6139 if (cost_data->loop_info)
6140 rs6000_density_test (cost_data);
6142 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6143 that require versioning for any reason. The vectorization is at
6144 best a wash inside the loop, and the versioning checks make
6145 profitability highly unlikely and potentially quite harmful. */
6146 if (cost_data->loop_info)
6148 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
6149 if (!rs6000_vect_nonmem
6150 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
6151 && LOOP_REQUIRES_VERSIONING (vec_info))
6152 cost_data->cost[vect_body] += 10000;
6155 *prologue_cost = cost_data->cost[vect_prologue];
6156 *body_cost = cost_data->cost[vect_body];
6157 *epilogue_cost = cost_data->cost[vect_epilogue];
6160 /* Implement targetm.vectorize.destroy_cost_data. */
6162 static void
6163 rs6000_destroy_cost_data (void *data)
6165 free (data);
6168 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6169 library with vectorized intrinsics. */
6171 static tree
6172 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
6173 tree type_in)
6175 char name[32];
6176 const char *suffix = NULL;
6177 tree fntype, new_fndecl, bdecl = NULL_TREE;
6178 int n_args = 1;
6179 const char *bname;
6180 machine_mode el_mode, in_mode;
6181 int n, in_n;
6183 /* Libmass is suitable for unsafe math only as it does not correctly support
6184 parts of IEEE with the required precision such as denormals. Only support
6185 it if we have VSX to use the simd d2 or f4 functions.
6186 XXX: Add variable length support. */
6187 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
6188 return NULL_TREE;
6190 el_mode = TYPE_MODE (TREE_TYPE (type_out));
6191 n = TYPE_VECTOR_SUBPARTS (type_out);
6192 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6193 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6194 if (el_mode != in_mode
6195 || n != in_n)
6196 return NULL_TREE;
6198 switch (fn)
6200 CASE_CFN_ATAN2:
6201 CASE_CFN_HYPOT:
6202 CASE_CFN_POW:
6203 n_args = 2;
6204 gcc_fallthrough ();
6206 CASE_CFN_ACOS:
6207 CASE_CFN_ACOSH:
6208 CASE_CFN_ASIN:
6209 CASE_CFN_ASINH:
6210 CASE_CFN_ATAN:
6211 CASE_CFN_ATANH:
6212 CASE_CFN_CBRT:
6213 CASE_CFN_COS:
6214 CASE_CFN_COSH:
6215 CASE_CFN_ERF:
6216 CASE_CFN_ERFC:
6217 CASE_CFN_EXP2:
6218 CASE_CFN_EXP:
6219 CASE_CFN_EXPM1:
6220 CASE_CFN_LGAMMA:
6221 CASE_CFN_LOG10:
6222 CASE_CFN_LOG1P:
6223 CASE_CFN_LOG2:
6224 CASE_CFN_LOG:
6225 CASE_CFN_SIN:
6226 CASE_CFN_SINH:
6227 CASE_CFN_SQRT:
6228 CASE_CFN_TAN:
6229 CASE_CFN_TANH:
6230 if (el_mode == DFmode && n == 2)
6232 bdecl = mathfn_built_in (double_type_node, fn);
6233 suffix = "d2"; /* pow -> powd2 */
6235 else if (el_mode == SFmode && n == 4)
6237 bdecl = mathfn_built_in (float_type_node, fn);
6238 suffix = "4"; /* powf -> powf4 */
6240 else
6241 return NULL_TREE;
6242 if (!bdecl)
6243 return NULL_TREE;
6244 break;
6246 default:
6247 return NULL_TREE;
6250 gcc_assert (suffix != NULL);
6251 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6252 if (!bname)
6253 return NULL_TREE;
6255 strcpy (name, bname + sizeof ("__builtin_") - 1);
6256 strcat (name, suffix);
6258 if (n_args == 1)
6259 fntype = build_function_type_list (type_out, type_in, NULL);
6260 else if (n_args == 2)
6261 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6262 else
6263 gcc_unreachable ();
6265 /* Build a function declaration for the vectorized function. */
6266 new_fndecl = build_decl (BUILTINS_LOCATION,
6267 FUNCTION_DECL, get_identifier (name), fntype);
6268 TREE_PUBLIC (new_fndecl) = 1;
6269 DECL_EXTERNAL (new_fndecl) = 1;
6270 DECL_IS_NOVOPS (new_fndecl) = 1;
6271 TREE_READONLY (new_fndecl) = 1;
6273 return new_fndecl;
6276 /* Returns a function decl for a vectorized version of the builtin function
6277 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6278 if it is not available. */
6280 static tree
6281 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6282 tree type_in)
6284 machine_mode in_mode, out_mode;
6285 int in_n, out_n;
6287 if (TARGET_DEBUG_BUILTIN)
6288 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6289 combined_fn_name (combined_fn (fn)),
6290 GET_MODE_NAME (TYPE_MODE (type_out)),
6291 GET_MODE_NAME (TYPE_MODE (type_in)));
6293 if (TREE_CODE (type_out) != VECTOR_TYPE
6294 || TREE_CODE (type_in) != VECTOR_TYPE
6295 || !TARGET_VECTORIZE_BUILTINS)
6296 return NULL_TREE;
6298 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6299 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6300 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6301 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6303 switch (fn)
6305 CASE_CFN_COPYSIGN:
6306 if (VECTOR_UNIT_VSX_P (V2DFmode)
6307 && out_mode == DFmode && out_n == 2
6308 && in_mode == DFmode && in_n == 2)
6309 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6310 if (VECTOR_UNIT_VSX_P (V4SFmode)
6311 && out_mode == SFmode && out_n == 4
6312 && in_mode == SFmode && in_n == 4)
6313 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6314 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6315 && out_mode == SFmode && out_n == 4
6316 && in_mode == SFmode && in_n == 4)
6317 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6318 break;
6319 CASE_CFN_CEIL:
6320 if (VECTOR_UNIT_VSX_P (V2DFmode)
6321 && out_mode == DFmode && out_n == 2
6322 && in_mode == DFmode && in_n == 2)
6323 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6324 if (VECTOR_UNIT_VSX_P (V4SFmode)
6325 && out_mode == SFmode && out_n == 4
6326 && in_mode == SFmode && in_n == 4)
6327 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6328 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6329 && out_mode == SFmode && out_n == 4
6330 && in_mode == SFmode && in_n == 4)
6331 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6332 break;
6333 CASE_CFN_FLOOR:
6334 if (VECTOR_UNIT_VSX_P (V2DFmode)
6335 && out_mode == DFmode && out_n == 2
6336 && in_mode == DFmode && in_n == 2)
6337 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6338 if (VECTOR_UNIT_VSX_P (V4SFmode)
6339 && out_mode == SFmode && out_n == 4
6340 && in_mode == SFmode && in_n == 4)
6341 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6342 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6343 && out_mode == SFmode && out_n == 4
6344 && in_mode == SFmode && in_n == 4)
6345 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6346 break;
6347 CASE_CFN_FMA:
6348 if (VECTOR_UNIT_VSX_P (V2DFmode)
6349 && out_mode == DFmode && out_n == 2
6350 && in_mode == DFmode && in_n == 2)
6351 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6352 if (VECTOR_UNIT_VSX_P (V4SFmode)
6353 && out_mode == SFmode && out_n == 4
6354 && in_mode == SFmode && in_n == 4)
6355 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6356 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6357 && out_mode == SFmode && out_n == 4
6358 && in_mode == SFmode && in_n == 4)
6359 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6360 break;
6361 CASE_CFN_TRUNC:
6362 if (VECTOR_UNIT_VSX_P (V2DFmode)
6363 && out_mode == DFmode && out_n == 2
6364 && in_mode == DFmode && in_n == 2)
6365 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6366 if (VECTOR_UNIT_VSX_P (V4SFmode)
6367 && out_mode == SFmode && out_n == 4
6368 && in_mode == SFmode && in_n == 4)
6369 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6370 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6371 && out_mode == SFmode && out_n == 4
6372 && in_mode == SFmode && in_n == 4)
6373 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6374 break;
6375 CASE_CFN_NEARBYINT:
6376 if (VECTOR_UNIT_VSX_P (V2DFmode)
6377 && flag_unsafe_math_optimizations
6378 && out_mode == DFmode && out_n == 2
6379 && in_mode == DFmode && in_n == 2)
6380 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6381 if (VECTOR_UNIT_VSX_P (V4SFmode)
6382 && flag_unsafe_math_optimizations
6383 && out_mode == SFmode && out_n == 4
6384 && in_mode == SFmode && in_n == 4)
6385 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6386 break;
6387 CASE_CFN_RINT:
6388 if (VECTOR_UNIT_VSX_P (V2DFmode)
6389 && !flag_trapping_math
6390 && out_mode == DFmode && out_n == 2
6391 && in_mode == DFmode && in_n == 2)
6392 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6393 if (VECTOR_UNIT_VSX_P (V4SFmode)
6394 && !flag_trapping_math
6395 && out_mode == SFmode && out_n == 4
6396 && in_mode == SFmode && in_n == 4)
6397 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6398 break;
6399 default:
6400 break;
6403 /* Generate calls to libmass if appropriate. */
6404 if (rs6000_veclib_handler)
6405 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6407 return NULL_TREE;
6410 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6412 static tree
6413 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6414 tree type_in)
6416 machine_mode in_mode, out_mode;
6417 int in_n, out_n;
6419 if (TARGET_DEBUG_BUILTIN)
6420 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6421 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6422 GET_MODE_NAME (TYPE_MODE (type_out)),
6423 GET_MODE_NAME (TYPE_MODE (type_in)));
6425 if (TREE_CODE (type_out) != VECTOR_TYPE
6426 || TREE_CODE (type_in) != VECTOR_TYPE
6427 || !TARGET_VECTORIZE_BUILTINS)
6428 return NULL_TREE;
6430 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6431 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6432 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6433 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6435 enum rs6000_builtins fn
6436 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6437 switch (fn)
6439 case RS6000_BUILTIN_RSQRTF:
6440 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6441 && out_mode == SFmode && out_n == 4
6442 && in_mode == SFmode && in_n == 4)
6443 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6444 break;
6445 case RS6000_BUILTIN_RSQRT:
6446 if (VECTOR_UNIT_VSX_P (V2DFmode)
6447 && out_mode == DFmode && out_n == 2
6448 && in_mode == DFmode && in_n == 2)
6449 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6450 break;
6451 case RS6000_BUILTIN_RECIPF:
6452 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6453 && out_mode == SFmode && out_n == 4
6454 && in_mode == SFmode && in_n == 4)
6455 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6456 break;
6457 case RS6000_BUILTIN_RECIP:
6458 if (VECTOR_UNIT_VSX_P (V2DFmode)
6459 && out_mode == DFmode && out_n == 2
6460 && in_mode == DFmode && in_n == 2)
6461 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6462 break;
6463 default:
6464 break;
6466 return NULL_TREE;
6469 /* Default CPU string for rs6000*_file_start functions. */
6470 static const char *rs6000_default_cpu;
6472 /* Do anything needed at the start of the asm file. */
6474 static void
6475 rs6000_file_start (void)
6477 char buffer[80];
6478 const char *start = buffer;
6479 FILE *file = asm_out_file;
6481 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6483 default_file_start ();
6485 if (flag_verbose_asm)
6487 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6489 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6491 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6492 start = "";
6495 if (global_options_set.x_rs6000_cpu_index)
6497 fprintf (file, "%s -mcpu=%s", start,
6498 processor_target_table[rs6000_cpu_index].name);
6499 start = "";
6502 if (global_options_set.x_rs6000_tune_index)
6504 fprintf (file, "%s -mtune=%s", start,
6505 processor_target_table[rs6000_tune_index].name);
6506 start = "";
6509 if (PPC405_ERRATUM77)
6511 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6512 start = "";
6515 #ifdef USING_ELFOS_H
6516 switch (rs6000_sdata)
6518 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6519 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6520 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6521 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6524 if (rs6000_sdata && g_switch_value)
6526 fprintf (file, "%s -G %d", start,
6527 g_switch_value);
6528 start = "";
6530 #endif
6532 if (*start == '\0')
6533 putc ('\n', file);
6536 #ifdef USING_ELFOS_H
6537 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6538 && !global_options_set.x_rs6000_cpu_index)
6540 fputs ("\t.machine ", asm_out_file);
6541 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6542 fputs ("power9\n", asm_out_file);
6543 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6544 fputs ("power8\n", asm_out_file);
6545 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6546 fputs ("power7\n", asm_out_file);
6547 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6548 fputs ("power6\n", asm_out_file);
6549 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6550 fputs ("power5\n", asm_out_file);
6551 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6552 fputs ("power4\n", asm_out_file);
6553 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6554 fputs ("ppc64\n", asm_out_file);
6555 else
6556 fputs ("ppc\n", asm_out_file);
6558 #endif
6560 if (DEFAULT_ABI == ABI_ELFv2)
6561 fprintf (file, "\t.abiversion 2\n");
6565 /* Return nonzero if this function is known to have a null epilogue. */
6568 direct_return (void)
6570 if (reload_completed)
6572 rs6000_stack_t *info = rs6000_stack_info ();
6574 if (info->first_gp_reg_save == 32
6575 && info->first_fp_reg_save == 64
6576 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6577 && ! info->lr_save_p
6578 && ! info->cr_save_p
6579 && info->vrsave_size == 0
6580 && ! info->push_p)
6581 return 1;
6584 return 0;
6587 /* Return the number of instructions it takes to form a constant in an
6588 integer register. */
6591 num_insns_constant_wide (HOST_WIDE_INT value)
6593 /* signed constant loadable with addi */
6594 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6595 return 1;
6597 /* constant loadable with addis */
6598 else if ((value & 0xffff) == 0
6599 && (value >> 31 == -1 || value >> 31 == 0))
6600 return 1;
6602 else if (TARGET_POWERPC64)
6604 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6605 HOST_WIDE_INT high = value >> 31;
6607 if (high == 0 || high == -1)
6608 return 2;
6610 high >>= 1;
6612 if (low == 0)
6613 return num_insns_constant_wide (high) + 1;
6614 else if (high == 0)
6615 return num_insns_constant_wide (low) + 1;
6616 else
6617 return (num_insns_constant_wide (high)
6618 + num_insns_constant_wide (low) + 1);
6621 else
6622 return 2;
6626 num_insns_constant (rtx op, machine_mode mode)
6628 HOST_WIDE_INT low, high;
6630 switch (GET_CODE (op))
6632 case CONST_INT:
6633 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6634 && rs6000_is_valid_and_mask (op, mode))
6635 return 2;
6636 else
6637 return num_insns_constant_wide (INTVAL (op));
6639 case CONST_WIDE_INT:
6641 int i;
6642 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6643 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6644 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6645 return ins;
6648 case CONST_DOUBLE:
6649 if (mode == SFmode || mode == SDmode)
6651 long l;
6653 if (DECIMAL_FLOAT_MODE_P (mode))
6654 REAL_VALUE_TO_TARGET_DECIMAL32
6655 (*CONST_DOUBLE_REAL_VALUE (op), l);
6656 else
6657 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6658 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6661 long l[2];
6662 if (DECIMAL_FLOAT_MODE_P (mode))
6663 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6664 else
6665 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6666 high = l[WORDS_BIG_ENDIAN == 0];
6667 low = l[WORDS_BIG_ENDIAN != 0];
6669 if (TARGET_32BIT)
6670 return (num_insns_constant_wide (low)
6671 + num_insns_constant_wide (high));
6672 else
6674 if ((high == 0 && low >= 0)
6675 || (high == -1 && low < 0))
6676 return num_insns_constant_wide (low);
6678 else if (rs6000_is_valid_and_mask (op, mode))
6679 return 2;
6681 else if (low == 0)
6682 return num_insns_constant_wide (high) + 1;
6684 else
6685 return (num_insns_constant_wide (high)
6686 + num_insns_constant_wide (low) + 1);
6689 default:
6690 gcc_unreachable ();
6694 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6695 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6696 corresponding element of the vector, but for V4SFmode and V2SFmode,
6697 the corresponding "float" is interpreted as an SImode integer. */
6699 HOST_WIDE_INT
6700 const_vector_elt_as_int (rtx op, unsigned int elt)
6702 rtx tmp;
6704 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6705 gcc_assert (GET_MODE (op) != V2DImode
6706 && GET_MODE (op) != V2DFmode);
6708 tmp = CONST_VECTOR_ELT (op, elt);
6709 if (GET_MODE (op) == V4SFmode
6710 || GET_MODE (op) == V2SFmode)
6711 tmp = gen_lowpart (SImode, tmp);
6712 return INTVAL (tmp);
6715 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6716 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6717 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6718 all items are set to the same value and contain COPIES replicas of the
6719 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6720 operand and the others are set to the value of the operand's msb. */
6722 static bool
6723 vspltis_constant (rtx op, unsigned step, unsigned copies)
6725 machine_mode mode = GET_MODE (op);
6726 machine_mode inner = GET_MODE_INNER (mode);
6728 unsigned i;
6729 unsigned nunits;
6730 unsigned bitsize;
6731 unsigned mask;
6733 HOST_WIDE_INT val;
6734 HOST_WIDE_INT splat_val;
6735 HOST_WIDE_INT msb_val;
6737 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6738 return false;
6740 nunits = GET_MODE_NUNITS (mode);
6741 bitsize = GET_MODE_BITSIZE (inner);
6742 mask = GET_MODE_MASK (inner);
6744 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6745 splat_val = val;
6746 msb_val = val >= 0 ? 0 : -1;
6748 /* Construct the value to be splatted, if possible. If not, return 0. */
6749 for (i = 2; i <= copies; i *= 2)
6751 HOST_WIDE_INT small_val;
6752 bitsize /= 2;
6753 small_val = splat_val >> bitsize;
6754 mask >>= bitsize;
6755 if (splat_val != ((HOST_WIDE_INT)
6756 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6757 | (small_val & mask)))
6758 return false;
6759 splat_val = small_val;
6762 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6763 if (EASY_VECTOR_15 (splat_val))
6766 /* Also check if we can splat, and then add the result to itself. Do so if
6767 the value is positive, of if the splat instruction is using OP's mode;
6768 for splat_val < 0, the splat and the add should use the same mode. */
6769 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6770 && (splat_val >= 0 || (step == 1 && copies == 1)))
6773 /* Also check if are loading up the most significant bit which can be done by
6774 loading up -1 and shifting the value left by -1. */
6775 else if (EASY_VECTOR_MSB (splat_val, inner))
6778 else
6779 return false;
6781 /* Check if VAL is present in every STEP-th element, and the
6782 other elements are filled with its most significant bit. */
6783 for (i = 1; i < nunits; ++i)
6785 HOST_WIDE_INT desired_val;
6786 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6787 if ((i & (step - 1)) == 0)
6788 desired_val = val;
6789 else
6790 desired_val = msb_val;
6792 if (desired_val != const_vector_elt_as_int (op, elt))
6793 return false;
6796 return true;
6799 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6800 instruction, filling in the bottom elements with 0 or -1.
6802 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6803 for the number of zeroes to shift in, or negative for the number of 0xff
6804 bytes to shift in.
6806 OP is a CONST_VECTOR. */
6809 vspltis_shifted (rtx op)
6811 machine_mode mode = GET_MODE (op);
6812 machine_mode inner = GET_MODE_INNER (mode);
6814 unsigned i, j;
6815 unsigned nunits;
6816 unsigned mask;
6818 HOST_WIDE_INT val;
6820 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6821 return false;
6823 /* We need to create pseudo registers to do the shift, so don't recognize
6824 shift vector constants after reload. */
6825 if (!can_create_pseudo_p ())
6826 return false;
6828 nunits = GET_MODE_NUNITS (mode);
6829 mask = GET_MODE_MASK (inner);
6831 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6833 /* Check if the value can really be the operand of a vspltis[bhw]. */
6834 if (EASY_VECTOR_15 (val))
6837 /* Also check if we are loading up the most significant bit which can be done
6838 by loading up -1 and shifting the value left by -1. */
6839 else if (EASY_VECTOR_MSB (val, inner))
6842 else
6843 return 0;
6845 /* Check if VAL is present in every STEP-th element until we find elements
6846 that are 0 or all 1 bits. */
6847 for (i = 1; i < nunits; ++i)
6849 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6850 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6852 /* If the value isn't the splat value, check for the remaining elements
6853 being 0/-1. */
6854 if (val != elt_val)
6856 if (elt_val == 0)
6858 for (j = i+1; j < nunits; ++j)
6860 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6861 if (const_vector_elt_as_int (op, elt2) != 0)
6862 return 0;
6865 return (nunits - i) * GET_MODE_SIZE (inner);
6868 else if ((elt_val & mask) == mask)
6870 for (j = i+1; j < nunits; ++j)
6872 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6873 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6874 return 0;
6877 return -((nunits - i) * GET_MODE_SIZE (inner));
6880 else
6881 return 0;
6885 /* If all elements are equal, we don't need to do VLSDOI. */
6886 return 0;
6890 /* Return true if OP is of the given MODE and can be synthesized
6891 with a vspltisb, vspltish or vspltisw. */
6893 bool
6894 easy_altivec_constant (rtx op, machine_mode mode)
6896 unsigned step, copies;
6898 if (mode == VOIDmode)
6899 mode = GET_MODE (op);
6900 else if (mode != GET_MODE (op))
6901 return false;
6903 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6904 constants. */
6905 if (mode == V2DFmode)
6906 return zero_constant (op, mode);
6908 else if (mode == V2DImode)
6910 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6911 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6912 return false;
6914 if (zero_constant (op, mode))
6915 return true;
6917 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6918 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6919 return true;
6921 return false;
6924 /* V1TImode is a special container for TImode. Ignore for now. */
6925 else if (mode == V1TImode)
6926 return false;
6928 /* Start with a vspltisw. */
6929 step = GET_MODE_NUNITS (mode) / 4;
6930 copies = 1;
6932 if (vspltis_constant (op, step, copies))
6933 return true;
6935 /* Then try with a vspltish. */
6936 if (step == 1)
6937 copies <<= 1;
6938 else
6939 step >>= 1;
6941 if (vspltis_constant (op, step, copies))
6942 return true;
6944 /* And finally a vspltisb. */
6945 if (step == 1)
6946 copies <<= 1;
6947 else
6948 step >>= 1;
6950 if (vspltis_constant (op, step, copies))
6951 return true;
6953 if (vspltis_shifted (op) != 0)
6954 return true;
6956 return false;
6959 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6960 result is OP. Abort if it is not possible. */
6963 gen_easy_altivec_constant (rtx op)
6965 machine_mode mode = GET_MODE (op);
6966 int nunits = GET_MODE_NUNITS (mode);
6967 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6968 unsigned step = nunits / 4;
6969 unsigned copies = 1;
6971 /* Start with a vspltisw. */
6972 if (vspltis_constant (op, step, copies))
6973 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6975 /* Then try with a vspltish. */
6976 if (step == 1)
6977 copies <<= 1;
6978 else
6979 step >>= 1;
6981 if (vspltis_constant (op, step, copies))
6982 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6984 /* And finally a vspltisb. */
6985 if (step == 1)
6986 copies <<= 1;
6987 else
6988 step >>= 1;
6990 if (vspltis_constant (op, step, copies))
6991 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6993 gcc_unreachable ();
6996 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6997 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6999 Return the number of instructions needed (1 or 2) into the address pointed
7000 via NUM_INSNS_PTR.
7002 Return the constant that is being split via CONSTANT_PTR. */
7004 bool
7005 xxspltib_constant_p (rtx op,
7006 machine_mode mode,
7007 int *num_insns_ptr,
7008 int *constant_ptr)
7010 size_t nunits = GET_MODE_NUNITS (mode);
7011 size_t i;
7012 HOST_WIDE_INT value;
7013 rtx element;
7015 /* Set the returned values to out of bound values. */
7016 *num_insns_ptr = -1;
7017 *constant_ptr = 256;
7019 if (!TARGET_P9_VECTOR)
7020 return false;
7022 if (mode == VOIDmode)
7023 mode = GET_MODE (op);
7025 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
7026 return false;
7028 /* Handle (vec_duplicate <constant>). */
7029 if (GET_CODE (op) == VEC_DUPLICATE)
7031 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7032 && mode != V2DImode)
7033 return false;
7035 element = XEXP (op, 0);
7036 if (!CONST_INT_P (element))
7037 return false;
7039 value = INTVAL (element);
7040 if (!IN_RANGE (value, -128, 127))
7041 return false;
7044 /* Handle (const_vector [...]). */
7045 else if (GET_CODE (op) == CONST_VECTOR)
7047 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7048 && mode != V2DImode)
7049 return false;
7051 element = CONST_VECTOR_ELT (op, 0);
7052 if (!CONST_INT_P (element))
7053 return false;
7055 value = INTVAL (element);
7056 if (!IN_RANGE (value, -128, 127))
7057 return false;
7059 for (i = 1; i < nunits; i++)
7061 element = CONST_VECTOR_ELT (op, i);
7062 if (!CONST_INT_P (element))
7063 return false;
7065 if (value != INTVAL (element))
7066 return false;
7070 /* Handle integer constants being loaded into the upper part of the VSX
7071 register as a scalar. If the value isn't 0/-1, only allow it if the mode
7072 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
7073 else if (CONST_INT_P (op))
7075 if (!SCALAR_INT_MODE_P (mode))
7076 return false;
7078 value = INTVAL (op);
7079 if (!IN_RANGE (value, -128, 127))
7080 return false;
7082 if (!IN_RANGE (value, -1, 0))
7084 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
7085 return false;
7087 if (EASY_VECTOR_15 (value))
7088 return false;
7092 else
7093 return false;
7095 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
7096 sign extend. Special case 0/-1 to allow getting any VSX register instead
7097 of an Altivec register. */
7098 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
7099 && EASY_VECTOR_15 (value))
7100 return false;
7102 /* Return # of instructions and the constant byte for XXSPLTIB. */
7103 if (mode == V16QImode)
7104 *num_insns_ptr = 1;
7106 else if (IN_RANGE (value, -1, 0))
7107 *num_insns_ptr = 1;
7109 else
7110 *num_insns_ptr = 2;
7112 *constant_ptr = (int) value;
7113 return true;
7116 const char *
7117 output_vec_const_move (rtx *operands)
7119 int cst, cst2, shift;
7120 machine_mode mode;
7121 rtx dest, vec;
7123 dest = operands[0];
7124 vec = operands[1];
7125 mode = GET_MODE (dest);
7127 if (TARGET_VSX)
7129 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
7130 int xxspltib_value = 256;
7131 int num_insns = -1;
7133 if (zero_constant (vec, mode))
7135 if (TARGET_P9_VECTOR)
7136 return "xxspltib %x0,0";
7138 else if (dest_vmx_p)
7139 return "vspltisw %0,0";
7141 else
7142 return "xxlxor %x0,%x0,%x0";
7145 if (all_ones_constant (vec, mode))
7147 if (TARGET_P9_VECTOR)
7148 return "xxspltib %x0,255";
7150 else if (dest_vmx_p)
7151 return "vspltisw %0,-1";
7153 else if (TARGET_P8_VECTOR)
7154 return "xxlorc %x0,%x0,%x0";
7156 else
7157 gcc_unreachable ();
7160 if (TARGET_P9_VECTOR
7161 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
7163 if (num_insns == 1)
7165 operands[2] = GEN_INT (xxspltib_value & 0xff);
7166 return "xxspltib %x0,%2";
7169 return "#";
7173 if (TARGET_ALTIVEC)
7175 rtx splat_vec;
7177 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
7178 if (zero_constant (vec, mode))
7179 return "vspltisw %0,0";
7181 if (all_ones_constant (vec, mode))
7182 return "vspltisw %0,-1";
7184 /* Do we need to construct a value using VSLDOI? */
7185 shift = vspltis_shifted (vec);
7186 if (shift != 0)
7187 return "#";
7189 splat_vec = gen_easy_altivec_constant (vec);
7190 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
7191 operands[1] = XEXP (splat_vec, 0);
7192 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
7193 return "#";
7195 switch (GET_MODE (splat_vec))
7197 case E_V4SImode:
7198 return "vspltisw %0,%1";
7200 case E_V8HImode:
7201 return "vspltish %0,%1";
7203 case E_V16QImode:
7204 return "vspltisb %0,%1";
7206 default:
7207 gcc_unreachable ();
7211 gcc_assert (TARGET_SPE);
7213 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7214 pattern of V1DI, V4HI, and V2SF.
7216 FIXME: We should probably return # and add post reload
7217 splitters for these, but this way is so easy ;-). */
7218 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
7219 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
7220 operands[1] = CONST_VECTOR_ELT (vec, 0);
7221 operands[2] = CONST_VECTOR_ELT (vec, 1);
7222 if (cst == cst2)
7223 return "li %0,%1\n\tevmergelo %0,%0,%0";
7224 else if (WORDS_BIG_ENDIAN)
7225 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7226 else
7227 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7230 /* Initialize TARGET of vector PAIRED to VALS. */
7232 void
7233 paired_expand_vector_init (rtx target, rtx vals)
7235 machine_mode mode = GET_MODE (target);
7236 int n_elts = GET_MODE_NUNITS (mode);
7237 int n_var = 0;
7238 rtx x, new_rtx, tmp, constant_op, op1, op2;
7239 int i;
7241 for (i = 0; i < n_elts; ++i)
7243 x = XVECEXP (vals, 0, i);
7244 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7245 ++n_var;
7247 if (n_var == 0)
7249 /* Load from constant pool. */
7250 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7251 return;
7254 if (n_var == 2)
7256 /* The vector is initialized only with non-constants. */
7257 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7258 XVECEXP (vals, 0, 1));
7260 emit_move_insn (target, new_rtx);
7261 return;
7264 /* One field is non-constant and the other one is a constant. Load the
7265 constant from the constant pool and use ps_merge instruction to
7266 construct the whole vector. */
7267 op1 = XVECEXP (vals, 0, 0);
7268 op2 = XVECEXP (vals, 0, 1);
7270 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7272 tmp = gen_reg_rtx (GET_MODE (constant_op));
7273 emit_move_insn (tmp, constant_op);
7275 if (CONSTANT_P (op1))
7276 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7277 else
7278 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7280 emit_move_insn (target, new_rtx);
7283 void
7284 paired_expand_vector_move (rtx operands[])
7286 rtx op0 = operands[0], op1 = operands[1];
7288 emit_move_insn (op0, op1);
7291 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7292 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7293 operands for the relation operation COND. This is a recursive
7294 function. */
7296 static void
7297 paired_emit_vector_compare (enum rtx_code rcode,
7298 rtx dest, rtx op0, rtx op1,
7299 rtx cc_op0, rtx cc_op1)
7301 rtx tmp = gen_reg_rtx (V2SFmode);
7302 rtx tmp1, max, min;
7304 gcc_assert (TARGET_PAIRED_FLOAT);
7305 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7307 switch (rcode)
7309 case LT:
7310 case LTU:
7311 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7312 return;
7313 case GE:
7314 case GEU:
7315 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7316 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7317 return;
7318 case LE:
7319 case LEU:
7320 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7321 return;
7322 case GT:
7323 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7324 return;
7325 case EQ:
7326 tmp1 = gen_reg_rtx (V2SFmode);
7327 max = gen_reg_rtx (V2SFmode);
7328 min = gen_reg_rtx (V2SFmode);
7329 gen_reg_rtx (V2SFmode);
7331 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7332 emit_insn (gen_selv2sf4
7333 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7334 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7335 emit_insn (gen_selv2sf4
7336 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7337 emit_insn (gen_subv2sf3 (tmp1, min, max));
7338 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7339 return;
7340 case NE:
7341 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7342 return;
7343 case UNLE:
7344 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7345 return;
7346 case UNLT:
7347 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7348 return;
7349 case UNGE:
7350 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7351 return;
7352 case UNGT:
7353 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7354 return;
7355 default:
7356 gcc_unreachable ();
7359 return;
7362 /* Emit vector conditional expression.
7363 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7364 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7367 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7368 rtx cond, rtx cc_op0, rtx cc_op1)
7370 enum rtx_code rcode = GET_CODE (cond);
7372 if (!TARGET_PAIRED_FLOAT)
7373 return 0;
7375 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7377 return 1;
7380 /* Initialize vector TARGET to VALS. */
7382 void
7383 rs6000_expand_vector_init (rtx target, rtx vals)
7385 machine_mode mode = GET_MODE (target);
7386 machine_mode inner_mode = GET_MODE_INNER (mode);
7387 int n_elts = GET_MODE_NUNITS (mode);
7388 int n_var = 0, one_var = -1;
7389 bool all_same = true, all_const_zero = true;
7390 rtx x, mem;
7391 int i;
7393 for (i = 0; i < n_elts; ++i)
7395 x = XVECEXP (vals, 0, i);
7396 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7397 ++n_var, one_var = i;
7398 else if (x != CONST0_RTX (inner_mode))
7399 all_const_zero = false;
7401 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7402 all_same = false;
7405 if (n_var == 0)
7407 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7408 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7409 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7411 /* Zero register. */
7412 emit_move_insn (target, CONST0_RTX (mode));
7413 return;
7415 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7417 /* Splat immediate. */
7418 emit_insn (gen_rtx_SET (target, const_vec));
7419 return;
7421 else
7423 /* Load from constant pool. */
7424 emit_move_insn (target, const_vec);
7425 return;
7429 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7430 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7432 rtx op[2];
7433 size_t i;
7434 size_t num_elements = all_same ? 1 : 2;
7435 for (i = 0; i < num_elements; i++)
7437 op[i] = XVECEXP (vals, 0, i);
7438 /* Just in case there is a SUBREG with a smaller mode, do a
7439 conversion. */
7440 if (GET_MODE (op[i]) != inner_mode)
7442 rtx tmp = gen_reg_rtx (inner_mode);
7443 convert_move (tmp, op[i], 0);
7444 op[i] = tmp;
7446 /* Allow load with splat double word. */
7447 else if (MEM_P (op[i]))
7449 if (!all_same)
7450 op[i] = force_reg (inner_mode, op[i]);
7452 else if (!REG_P (op[i]))
7453 op[i] = force_reg (inner_mode, op[i]);
7456 if (all_same)
7458 if (mode == V2DFmode)
7459 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7460 else
7461 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7463 else
7465 if (mode == V2DFmode)
7466 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7467 else
7468 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7470 return;
7473 /* Special case initializing vector int if we are on 64-bit systems with
7474 direct move or we have the ISA 3.0 instructions. */
7475 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7476 && TARGET_DIRECT_MOVE_64BIT)
7478 if (all_same)
7480 rtx element0 = XVECEXP (vals, 0, 0);
7481 if (MEM_P (element0))
7482 element0 = rs6000_address_for_fpconvert (element0);
7483 else
7484 element0 = force_reg (SImode, element0);
7486 if (TARGET_P9_VECTOR)
7487 emit_insn (gen_vsx_splat_v4si (target, element0));
7488 else
7490 rtx tmp = gen_reg_rtx (DImode);
7491 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7492 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7494 return;
7496 else
7498 rtx elements[4];
7499 size_t i;
7501 for (i = 0; i < 4; i++)
7503 elements[i] = XVECEXP (vals, 0, i);
7504 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7505 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7508 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7509 elements[2], elements[3]));
7510 return;
7514 /* With single precision floating point on VSX, know that internally single
7515 precision is actually represented as a double, and either make 2 V2DF
7516 vectors, and convert these vectors to single precision, or do one
7517 conversion, and splat the result to the other elements. */
7518 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7520 if (all_same)
7522 rtx element0 = XVECEXP (vals, 0, 0);
7524 if (TARGET_P9_VECTOR)
7526 if (MEM_P (element0))
7527 element0 = rs6000_address_for_fpconvert (element0);
7529 emit_insn (gen_vsx_splat_v4sf (target, element0));
7532 else
7534 rtx freg = gen_reg_rtx (V4SFmode);
7535 rtx sreg = force_reg (SFmode, element0);
7536 rtx cvt = (TARGET_XSCVDPSPN
7537 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7538 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7540 emit_insn (cvt);
7541 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7542 const0_rtx));
7545 else
7547 rtx dbl_even = gen_reg_rtx (V2DFmode);
7548 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7549 rtx flt_even = gen_reg_rtx (V4SFmode);
7550 rtx flt_odd = gen_reg_rtx (V4SFmode);
7551 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7552 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7553 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7554 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7556 /* Use VMRGEW if we can instead of doing a permute. */
7557 if (TARGET_P8_VECTOR)
7559 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7560 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7561 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7562 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7563 if (BYTES_BIG_ENDIAN)
7564 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7565 else
7566 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7568 else
7570 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7571 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7572 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7573 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7574 rs6000_expand_extract_even (target, flt_even, flt_odd);
7577 return;
7580 /* Special case initializing vector short/char that are splats if we are on
7581 64-bit systems with direct move. */
7582 if (all_same && TARGET_DIRECT_MOVE_64BIT
7583 && (mode == V16QImode || mode == V8HImode))
7585 rtx op0 = XVECEXP (vals, 0, 0);
7586 rtx di_tmp = gen_reg_rtx (DImode);
7588 if (!REG_P (op0))
7589 op0 = force_reg (GET_MODE_INNER (mode), op0);
7591 if (mode == V16QImode)
7593 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7594 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7595 return;
7598 if (mode == V8HImode)
7600 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7601 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7602 return;
7606 /* Store value to stack temp. Load vector element. Splat. However, splat
7607 of 64-bit items is not supported on Altivec. */
7608 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7610 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7611 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7612 XVECEXP (vals, 0, 0));
7613 x = gen_rtx_UNSPEC (VOIDmode,
7614 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7615 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7616 gen_rtvec (2,
7617 gen_rtx_SET (target, mem),
7618 x)));
7619 x = gen_rtx_VEC_SELECT (inner_mode, target,
7620 gen_rtx_PARALLEL (VOIDmode,
7621 gen_rtvec (1, const0_rtx)));
7622 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7623 return;
7626 /* One field is non-constant. Load constant then overwrite
7627 varying field. */
7628 if (n_var == 1)
7630 rtx copy = copy_rtx (vals);
7632 /* Load constant part of vector, substitute neighboring value for
7633 varying element. */
7634 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7635 rs6000_expand_vector_init (target, copy);
7637 /* Insert variable. */
7638 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7639 return;
7642 /* Construct the vector in memory one field at a time
7643 and load the whole vector. */
7644 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7645 for (i = 0; i < n_elts; i++)
7646 emit_move_insn (adjust_address_nv (mem, inner_mode,
7647 i * GET_MODE_SIZE (inner_mode)),
7648 XVECEXP (vals, 0, i));
7649 emit_move_insn (target, mem);
7652 /* Set field ELT of TARGET to VAL. */
7654 void
7655 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7657 machine_mode mode = GET_MODE (target);
7658 machine_mode inner_mode = GET_MODE_INNER (mode);
7659 rtx reg = gen_reg_rtx (mode);
7660 rtx mask, mem, x;
7661 int width = GET_MODE_SIZE (inner_mode);
7662 int i;
7664 val = force_reg (GET_MODE (val), val);
7666 if (VECTOR_MEM_VSX_P (mode))
7668 rtx insn = NULL_RTX;
7669 rtx elt_rtx = GEN_INT (elt);
7671 if (mode == V2DFmode)
7672 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7674 else if (mode == V2DImode)
7675 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7677 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7678 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7680 if (mode == V4SImode)
7681 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7682 else if (mode == V8HImode)
7683 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7684 else if (mode == V16QImode)
7685 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7688 if (insn)
7690 emit_insn (insn);
7691 return;
7695 /* Simplify setting single element vectors like V1TImode. */
7696 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7698 emit_move_insn (target, gen_lowpart (mode, val));
7699 return;
7702 /* Load single variable value. */
7703 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7704 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7705 x = gen_rtx_UNSPEC (VOIDmode,
7706 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7707 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7708 gen_rtvec (2,
7709 gen_rtx_SET (reg, mem),
7710 x)));
7712 /* Linear sequence. */
7713 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7714 for (i = 0; i < 16; ++i)
7715 XVECEXP (mask, 0, i) = GEN_INT (i);
7717 /* Set permute mask to insert element into target. */
7718 for (i = 0; i < width; ++i)
7719 XVECEXP (mask, 0, elt*width + i)
7720 = GEN_INT (i + 0x10);
7721 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7723 if (BYTES_BIG_ENDIAN)
7724 x = gen_rtx_UNSPEC (mode,
7725 gen_rtvec (3, target, reg,
7726 force_reg (V16QImode, x)),
7727 UNSPEC_VPERM);
7728 else
7730 if (TARGET_P9_VECTOR)
7731 x = gen_rtx_UNSPEC (mode,
7732 gen_rtvec (3, target, reg,
7733 force_reg (V16QImode, x)),
7734 UNSPEC_VPERMR);
7735 else
7737 /* Invert selector. We prefer to generate VNAND on P8 so
7738 that future fusion opportunities can kick in, but must
7739 generate VNOR elsewhere. */
7740 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7741 rtx iorx = (TARGET_P8_VECTOR
7742 ? gen_rtx_IOR (V16QImode, notx, notx)
7743 : gen_rtx_AND (V16QImode, notx, notx));
7744 rtx tmp = gen_reg_rtx (V16QImode);
7745 emit_insn (gen_rtx_SET (tmp, iorx));
7747 /* Permute with operands reversed and adjusted selector. */
7748 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7749 UNSPEC_VPERM);
7753 emit_insn (gen_rtx_SET (target, x));
7756 /* Extract field ELT from VEC into TARGET. */
7758 void
7759 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7761 machine_mode mode = GET_MODE (vec);
7762 machine_mode inner_mode = GET_MODE_INNER (mode);
7763 rtx mem;
7765 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7767 switch (mode)
7769 default:
7770 break;
7771 case E_V1TImode:
7772 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7773 emit_move_insn (target, gen_lowpart (TImode, vec));
7774 break;
7775 case E_V2DFmode:
7776 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7777 return;
7778 case E_V2DImode:
7779 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7780 return;
7781 case E_V4SFmode:
7782 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7783 return;
7784 case E_V16QImode:
7785 if (TARGET_DIRECT_MOVE_64BIT)
7787 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7788 return;
7790 else
7791 break;
7792 case E_V8HImode:
7793 if (TARGET_DIRECT_MOVE_64BIT)
7795 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7796 return;
7798 else
7799 break;
7800 case E_V4SImode:
7801 if (TARGET_DIRECT_MOVE_64BIT)
7803 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7804 return;
7806 break;
7809 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7810 && TARGET_DIRECT_MOVE_64BIT)
7812 if (GET_MODE (elt) != DImode)
7814 rtx tmp = gen_reg_rtx (DImode);
7815 convert_move (tmp, elt, 0);
7816 elt = tmp;
7818 else if (!REG_P (elt))
7819 elt = force_reg (DImode, elt);
7821 switch (mode)
7823 case E_V2DFmode:
7824 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7825 return;
7827 case E_V2DImode:
7828 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7829 return;
7831 case E_V4SFmode:
7832 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7833 return;
7835 case E_V4SImode:
7836 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7837 return;
7839 case E_V8HImode:
7840 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7841 return;
7843 case E_V16QImode:
7844 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7845 return;
7847 default:
7848 gcc_unreachable ();
7852 gcc_assert (CONST_INT_P (elt));
7854 /* Allocate mode-sized buffer. */
7855 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7857 emit_move_insn (mem, vec);
7859 /* Add offset to field within buffer matching vector element. */
7860 mem = adjust_address_nv (mem, inner_mode,
7861 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7863 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7866 /* Helper function to return the register number of a RTX. */
7867 static inline int
7868 regno_or_subregno (rtx op)
7870 if (REG_P (op))
7871 return REGNO (op);
7872 else if (SUBREG_P (op))
7873 return subreg_regno (op);
7874 else
7875 gcc_unreachable ();
7878 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7879 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7880 temporary (BASE_TMP) to fixup the address. Return the new memory address
7881 that is valid for reads or writes to a given register (SCALAR_REG). */
7884 rs6000_adjust_vec_address (rtx scalar_reg,
7885 rtx mem,
7886 rtx element,
7887 rtx base_tmp,
7888 machine_mode scalar_mode)
7890 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7891 rtx addr = XEXP (mem, 0);
7892 rtx element_offset;
7893 rtx new_addr;
7894 bool valid_addr_p;
7896 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7897 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7899 /* Calculate what we need to add to the address to get the element
7900 address. */
7901 if (CONST_INT_P (element))
7902 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7903 else
7905 int byte_shift = exact_log2 (scalar_size);
7906 gcc_assert (byte_shift >= 0);
7908 if (byte_shift == 0)
7909 element_offset = element;
7911 else
7913 if (TARGET_POWERPC64)
7914 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7915 else
7916 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7918 element_offset = base_tmp;
7922 /* Create the new address pointing to the element within the vector. If we
7923 are adding 0, we don't have to change the address. */
7924 if (element_offset == const0_rtx)
7925 new_addr = addr;
7927 /* A simple indirect address can be converted into a reg + offset
7928 address. */
7929 else if (REG_P (addr) || SUBREG_P (addr))
7930 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7932 /* Optimize D-FORM addresses with constant offset with a constant element, to
7933 include the element offset in the address directly. */
7934 else if (GET_CODE (addr) == PLUS)
7936 rtx op0 = XEXP (addr, 0);
7937 rtx op1 = XEXP (addr, 1);
7938 rtx insn;
7940 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7941 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7943 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7944 rtx offset_rtx = GEN_INT (offset);
7946 if (IN_RANGE (offset, -32768, 32767)
7947 && (scalar_size < 8 || (offset & 0x3) == 0))
7948 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7949 else
7951 emit_move_insn (base_tmp, offset_rtx);
7952 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7955 else
7957 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7958 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7960 /* Note, ADDI requires the register being added to be a base
7961 register. If the register was R0, load it up into the temporary
7962 and do the add. */
7963 if (op1_reg_p
7964 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7966 insn = gen_add3_insn (base_tmp, op1, element_offset);
7967 gcc_assert (insn != NULL_RTX);
7968 emit_insn (insn);
7971 else if (ele_reg_p
7972 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7974 insn = gen_add3_insn (base_tmp, element_offset, op1);
7975 gcc_assert (insn != NULL_RTX);
7976 emit_insn (insn);
7979 else
7981 emit_move_insn (base_tmp, op1);
7982 emit_insn (gen_add2_insn (base_tmp, element_offset));
7985 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7989 else
7991 emit_move_insn (base_tmp, addr);
7992 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7995 /* If we have a PLUS, we need to see whether the particular register class
7996 allows for D-FORM or X-FORM addressing. */
7997 if (GET_CODE (new_addr) == PLUS)
7999 rtx op1 = XEXP (new_addr, 1);
8000 addr_mask_type addr_mask;
8001 int scalar_regno = regno_or_subregno (scalar_reg);
8003 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
8004 if (INT_REGNO_P (scalar_regno))
8005 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
8007 else if (FP_REGNO_P (scalar_regno))
8008 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
8010 else if (ALTIVEC_REGNO_P (scalar_regno))
8011 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
8013 else
8014 gcc_unreachable ();
8016 if (REG_P (op1) || SUBREG_P (op1))
8017 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
8018 else
8019 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
8022 else if (REG_P (new_addr) || SUBREG_P (new_addr))
8023 valid_addr_p = true;
8025 else
8026 valid_addr_p = false;
8028 if (!valid_addr_p)
8030 emit_move_insn (base_tmp, new_addr);
8031 new_addr = base_tmp;
8034 return change_address (mem, scalar_mode, new_addr);
8037 /* Split a variable vec_extract operation into the component instructions. */
8039 void
8040 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
8041 rtx tmp_altivec)
8043 machine_mode mode = GET_MODE (src);
8044 machine_mode scalar_mode = GET_MODE (dest);
8045 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
8046 int byte_shift = exact_log2 (scalar_size);
8048 gcc_assert (byte_shift >= 0);
8050 /* If we are given a memory address, optimize to load just the element. We
8051 don't have to adjust the vector element number on little endian
8052 systems. */
8053 if (MEM_P (src))
8055 gcc_assert (REG_P (tmp_gpr));
8056 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
8057 tmp_gpr, scalar_mode));
8058 return;
8061 else if (REG_P (src) || SUBREG_P (src))
8063 int bit_shift = byte_shift + 3;
8064 rtx element2;
8065 int dest_regno = regno_or_subregno (dest);
8066 int src_regno = regno_or_subregno (src);
8067 int element_regno = regno_or_subregno (element);
8069 gcc_assert (REG_P (tmp_gpr));
8071 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8072 a general purpose register. */
8073 if (TARGET_P9_VECTOR
8074 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
8075 && INT_REGNO_P (dest_regno)
8076 && ALTIVEC_REGNO_P (src_regno)
8077 && INT_REGNO_P (element_regno))
8079 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
8080 rtx element_si = gen_rtx_REG (SImode, element_regno);
8082 if (mode == V16QImode)
8083 emit_insn (VECTOR_ELT_ORDER_BIG
8084 ? gen_vextublx (dest_si, element_si, src)
8085 : gen_vextubrx (dest_si, element_si, src));
8087 else if (mode == V8HImode)
8089 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8090 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
8091 emit_insn (VECTOR_ELT_ORDER_BIG
8092 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
8093 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
8097 else
8099 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8100 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8101 emit_insn (VECTOR_ELT_ORDER_BIG
8102 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8103 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8106 return;
8110 gcc_assert (REG_P (tmp_altivec));
8112 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8113 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8114 will shift the element into the upper position (adding 3 to convert a
8115 byte shift into a bit shift). */
8116 if (scalar_size == 8)
8118 if (!VECTOR_ELT_ORDER_BIG)
8120 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8121 element2 = tmp_gpr;
8123 else
8124 element2 = element;
8126 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8127 bit. */
8128 emit_insn (gen_rtx_SET (tmp_gpr,
8129 gen_rtx_AND (DImode,
8130 gen_rtx_ASHIFT (DImode,
8131 element2,
8132 GEN_INT (6)),
8133 GEN_INT (64))));
8135 else
8137 if (!VECTOR_ELT_ORDER_BIG)
8139 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
8141 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8142 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8143 element2 = tmp_gpr;
8145 else
8146 element2 = element;
8148 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8151 /* Get the value into the lower byte of the Altivec register where VSLO
8152 expects it. */
8153 if (TARGET_P9_VECTOR)
8154 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8155 else if (can_create_pseudo_p ())
8156 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8157 else
8159 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8160 emit_move_insn (tmp_di, tmp_gpr);
8161 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8164 /* Do the VSLO to get the value into the final location. */
8165 switch (mode)
8167 case E_V2DFmode:
8168 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8169 return;
8171 case E_V2DImode:
8172 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8173 return;
8175 case E_V4SFmode:
8177 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8178 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8179 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8180 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8181 tmp_altivec));
8183 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8184 return;
8187 case E_V4SImode:
8188 case E_V8HImode:
8189 case E_V16QImode:
8191 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8192 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8193 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8194 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8195 tmp_altivec));
8196 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8197 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
8198 GEN_INT (64 - (8 * scalar_size))));
8199 return;
8202 default:
8203 gcc_unreachable ();
8206 return;
8208 else
8209 gcc_unreachable ();
8212 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8213 two SImode values. */
8215 static void
8216 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
8218 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
8220 if (CONST_INT_P (si1) && CONST_INT_P (si2))
8222 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
8223 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
8225 emit_move_insn (dest, GEN_INT (const1 | const2));
8226 return;
8229 /* Put si1 into upper 32-bits of dest. */
8230 if (CONST_INT_P (si1))
8231 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8232 else
8234 /* Generate RLDIC. */
8235 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8236 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8237 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8238 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8239 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8240 emit_insn (gen_rtx_SET (dest, and_rtx));
8243 /* Put si2 into the temporary. */
8244 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8245 if (CONST_INT_P (si2))
8246 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8247 else
8248 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8250 /* Combine the two parts. */
8251 emit_insn (gen_iordi3 (dest, dest, tmp));
8252 return;
8255 /* Split a V4SI initialization. */
8257 void
8258 rs6000_split_v4si_init (rtx operands[])
8260 rtx dest = operands[0];
8262 /* Destination is a GPR, build up the two DImode parts in place. */
8263 if (REG_P (dest) || SUBREG_P (dest))
8265 int d_regno = regno_or_subregno (dest);
8266 rtx scalar1 = operands[1];
8267 rtx scalar2 = operands[2];
8268 rtx scalar3 = operands[3];
8269 rtx scalar4 = operands[4];
8270 rtx tmp1 = operands[5];
8271 rtx tmp2 = operands[6];
8273 /* Even though we only need one temporary (plus the destination, which
8274 has an early clobber constraint, try to use two temporaries, one for
8275 each double word created. That way the 2nd insn scheduling pass can
8276 rearrange things so the two parts are done in parallel. */
8277 if (BYTES_BIG_ENDIAN)
8279 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8280 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8281 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8282 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8284 else
8286 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8287 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8288 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8289 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8290 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8292 return;
8295 else
8296 gcc_unreachable ();
8299 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8301 bool
8302 invalid_e500_subreg (rtx op, machine_mode mode)
8304 if (TARGET_E500_DOUBLE)
8306 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8307 subreg:TI and reg:TF. Decimal float modes are like integer
8308 modes (only low part of each register used) for this
8309 purpose. */
8310 if (GET_CODE (op) == SUBREG
8311 && (mode == SImode || mode == DImode || mode == TImode
8312 || mode == DDmode || mode == TDmode || mode == PTImode)
8313 && REG_P (SUBREG_REG (op))
8314 && (GET_MODE (SUBREG_REG (op)) == DFmode
8315 || GET_MODE (SUBREG_REG (op)) == TFmode
8316 || GET_MODE (SUBREG_REG (op)) == IFmode
8317 || GET_MODE (SUBREG_REG (op)) == KFmode))
8318 return true;
8320 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8321 reg:TI. */
8322 if (GET_CODE (op) == SUBREG
8323 && (mode == DFmode || mode == TFmode || mode == IFmode
8324 || mode == KFmode)
8325 && REG_P (SUBREG_REG (op))
8326 && (GET_MODE (SUBREG_REG (op)) == DImode
8327 || GET_MODE (SUBREG_REG (op)) == TImode
8328 || GET_MODE (SUBREG_REG (op)) == PTImode
8329 || GET_MODE (SUBREG_REG (op)) == DDmode
8330 || GET_MODE (SUBREG_REG (op)) == TDmode))
8331 return true;
8334 if (TARGET_SPE
8335 && GET_CODE (op) == SUBREG
8336 && mode == SImode
8337 && REG_P (SUBREG_REG (op))
8338 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8339 return true;
8341 return false;
8344 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8345 selects whether the alignment is abi mandated, optional, or
8346 both abi and optional alignment. */
8348 unsigned int
8349 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8351 if (how != align_opt)
8353 if (TREE_CODE (type) == VECTOR_TYPE)
8355 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8356 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8358 if (align < 64)
8359 align = 64;
8361 else if (align < 128)
8362 align = 128;
8364 else if (TARGET_E500_DOUBLE
8365 && TREE_CODE (type) == REAL_TYPE
8366 && TYPE_MODE (type) == DFmode)
8368 if (align < 64)
8369 align = 64;
8373 if (how != align_abi)
8375 if (TREE_CODE (type) == ARRAY_TYPE
8376 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8378 if (align < BITS_PER_WORD)
8379 align = BITS_PER_WORD;
8383 return align;
8386 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8387 instructions simply ignore the low bits; SPE vector memory
8388 instructions trap on unaligned accesses; VSX memory instructions are
8389 aligned to 4 or 8 bytes. */
8391 static bool
8392 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8394 return (STRICT_ALIGNMENT
8395 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8396 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8397 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
8398 && (int) align < VECTOR_ALIGN (mode)))));
8401 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8403 bool
8404 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8406 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8408 if (computed != 128)
8410 static bool warned;
8411 if (!warned && warn_psabi)
8413 warned = true;
8414 inform (input_location,
8415 "the layout of aggregates containing vectors with"
8416 " %d-byte alignment has changed in GCC 5",
8417 computed / BITS_PER_UNIT);
8420 /* In current GCC there is no special case. */
8421 return false;
8424 return false;
8427 /* AIX increases natural record alignment to doubleword if the first
8428 field is an FP double while the FP fields remain word aligned. */
8430 unsigned int
8431 rs6000_special_round_type_align (tree type, unsigned int computed,
8432 unsigned int specified)
8434 unsigned int align = MAX (computed, specified);
8435 tree field = TYPE_FIELDS (type);
8437 /* Skip all non field decls */
8438 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8439 field = DECL_CHAIN (field);
8441 if (field != NULL && field != type)
8443 type = TREE_TYPE (field);
8444 while (TREE_CODE (type) == ARRAY_TYPE)
8445 type = TREE_TYPE (type);
8447 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8448 align = MAX (align, 64);
8451 return align;
8454 /* Darwin increases record alignment to the natural alignment of
8455 the first field. */
8457 unsigned int
8458 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8459 unsigned int specified)
8461 unsigned int align = MAX (computed, specified);
8463 if (TYPE_PACKED (type))
8464 return align;
8466 /* Find the first field, looking down into aggregates. */
8467 do {
8468 tree field = TYPE_FIELDS (type);
8469 /* Skip all non field decls */
8470 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8471 field = DECL_CHAIN (field);
8472 if (! field)
8473 break;
8474 /* A packed field does not contribute any extra alignment. */
8475 if (DECL_PACKED (field))
8476 return align;
8477 type = TREE_TYPE (field);
8478 while (TREE_CODE (type) == ARRAY_TYPE)
8479 type = TREE_TYPE (type);
8480 } while (AGGREGATE_TYPE_P (type));
8482 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8483 align = MAX (align, TYPE_ALIGN (type));
8485 return align;
8488 /* Return 1 for an operand in small memory on V.4/eabi. */
8491 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8492 machine_mode mode ATTRIBUTE_UNUSED)
8494 #if TARGET_ELF
8495 rtx sym_ref;
8497 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8498 return 0;
8500 if (DEFAULT_ABI != ABI_V4)
8501 return 0;
8503 /* Vector and float memory instructions have a limited offset on the
8504 SPE, so using a vector or float variable directly as an operand is
8505 not useful. */
8506 if (TARGET_SPE
8507 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8508 return 0;
8510 if (GET_CODE (op) == SYMBOL_REF)
8511 sym_ref = op;
8513 else if (GET_CODE (op) != CONST
8514 || GET_CODE (XEXP (op, 0)) != PLUS
8515 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8516 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8517 return 0;
8519 else
8521 rtx sum = XEXP (op, 0);
8522 HOST_WIDE_INT summand;
8524 /* We have to be careful here, because it is the referenced address
8525 that must be 32k from _SDA_BASE_, not just the symbol. */
8526 summand = INTVAL (XEXP (sum, 1));
8527 if (summand < 0 || summand > g_switch_value)
8528 return 0;
8530 sym_ref = XEXP (sum, 0);
8533 return SYMBOL_REF_SMALL_P (sym_ref);
8534 #else
8535 return 0;
8536 #endif
8539 /* Return true if either operand is a general purpose register. */
8541 bool
8542 gpr_or_gpr_p (rtx op0, rtx op1)
8544 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8545 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8548 /* Return true if this is a move direct operation between GPR registers and
8549 floating point/VSX registers. */
8551 bool
8552 direct_move_p (rtx op0, rtx op1)
8554 int regno0, regno1;
8556 if (!REG_P (op0) || !REG_P (op1))
8557 return false;
8559 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8560 return false;
8562 regno0 = REGNO (op0);
8563 regno1 = REGNO (op1);
8564 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8565 return false;
8567 if (INT_REGNO_P (regno0))
8568 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8570 else if (INT_REGNO_P (regno1))
8572 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8573 return true;
8575 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8576 return true;
8579 return false;
8582 /* Return true if the OFFSET is valid for the quad address instructions that
8583 use d-form (register + offset) addressing. */
8585 static inline bool
8586 quad_address_offset_p (HOST_WIDE_INT offset)
8588 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8591 /* Return true if the ADDR is an acceptable address for a quad memory
8592 operation of mode MODE (either LQ/STQ for general purpose registers, or
8593 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8594 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8595 3.0 LXV/STXV instruction. */
8597 bool
8598 quad_address_p (rtx addr, machine_mode mode, bool strict)
8600 rtx op0, op1;
8602 if (GET_MODE_SIZE (mode) != 16)
8603 return false;
8605 if (legitimate_indirect_address_p (addr, strict))
8606 return true;
8608 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8609 return false;
8611 if (GET_CODE (addr) != PLUS)
8612 return false;
8614 op0 = XEXP (addr, 0);
8615 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8616 return false;
8618 op1 = XEXP (addr, 1);
8619 if (!CONST_INT_P (op1))
8620 return false;
8622 return quad_address_offset_p (INTVAL (op1));
8625 /* Return true if this is a load or store quad operation. This function does
8626 not handle the atomic quad memory instructions. */
8628 bool
8629 quad_load_store_p (rtx op0, rtx op1)
8631 bool ret;
8633 if (!TARGET_QUAD_MEMORY)
8634 ret = false;
8636 else if (REG_P (op0) && MEM_P (op1))
8637 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8638 && quad_memory_operand (op1, GET_MODE (op1))
8639 && !reg_overlap_mentioned_p (op0, op1));
8641 else if (MEM_P (op0) && REG_P (op1))
8642 ret = (quad_memory_operand (op0, GET_MODE (op0))
8643 && quad_int_reg_operand (op1, GET_MODE (op1)));
8645 else
8646 ret = false;
8648 if (TARGET_DEBUG_ADDR)
8650 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8651 ret ? "true" : "false");
8652 debug_rtx (gen_rtx_SET (op0, op1));
8655 return ret;
8658 /* Given an address, return a constant offset term if one exists. */
8660 static rtx
8661 address_offset (rtx op)
8663 if (GET_CODE (op) == PRE_INC
8664 || GET_CODE (op) == PRE_DEC)
8665 op = XEXP (op, 0);
8666 else if (GET_CODE (op) == PRE_MODIFY
8667 || GET_CODE (op) == LO_SUM)
8668 op = XEXP (op, 1);
8670 if (GET_CODE (op) == CONST)
8671 op = XEXP (op, 0);
8673 if (GET_CODE (op) == PLUS)
8674 op = XEXP (op, 1);
8676 if (CONST_INT_P (op))
8677 return op;
8679 return NULL_RTX;
8682 /* Return true if the MEM operand is a memory operand suitable for use
8683 with a (full width, possibly multiple) gpr load/store. On
8684 powerpc64 this means the offset must be divisible by 4.
8685 Implements 'Y' constraint.
8687 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8688 a constraint function we know the operand has satisfied a suitable
8689 memory predicate. Also accept some odd rtl generated by reload
8690 (see rs6000_legitimize_reload_address for various forms). It is
8691 important that reload rtl be accepted by appropriate constraints
8692 but not by the operand predicate.
8694 Offsetting a lo_sum should not be allowed, except where we know by
8695 alignment that a 32k boundary is not crossed, but see the ???
8696 comment in rs6000_legitimize_reload_address. Note that by
8697 "offsetting" here we mean a further offset to access parts of the
8698 MEM. It's fine to have a lo_sum where the inner address is offset
8699 from a sym, since the same sym+offset will appear in the high part
8700 of the address calculation. */
8702 bool
8703 mem_operand_gpr (rtx op, machine_mode mode)
8705 unsigned HOST_WIDE_INT offset;
8706 int extra;
8707 rtx addr = XEXP (op, 0);
8709 op = address_offset (addr);
8710 if (op == NULL_RTX)
8711 return true;
8713 offset = INTVAL (op);
8714 if (TARGET_POWERPC64 && (offset & 3) != 0)
8715 return false;
8717 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8718 if (extra < 0)
8719 extra = 0;
8721 if (GET_CODE (addr) == LO_SUM)
8722 /* For lo_sum addresses, we must allow any offset except one that
8723 causes a wrap, so test only the low 16 bits. */
8724 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8726 return offset + 0x8000 < 0x10000u - extra;
8729 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8730 enforce an offset divisible by 4 even for 32-bit. */
8732 bool
8733 mem_operand_ds_form (rtx op, machine_mode mode)
8735 unsigned HOST_WIDE_INT offset;
8736 int extra;
8737 rtx addr = XEXP (op, 0);
8739 if (!offsettable_address_p (false, mode, addr))
8740 return false;
8742 op = address_offset (addr);
8743 if (op == NULL_RTX)
8744 return true;
8746 offset = INTVAL (op);
8747 if ((offset & 3) != 0)
8748 return false;
8750 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8751 if (extra < 0)
8752 extra = 0;
8754 if (GET_CODE (addr) == LO_SUM)
8755 /* For lo_sum addresses, we must allow any offset except one that
8756 causes a wrap, so test only the low 16 bits. */
8757 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8759 return offset + 0x8000 < 0x10000u - extra;
8762 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8764 static bool
8765 reg_offset_addressing_ok_p (machine_mode mode)
8767 switch (mode)
8769 case E_V16QImode:
8770 case E_V8HImode:
8771 case E_V4SFmode:
8772 case E_V4SImode:
8773 case E_V2DFmode:
8774 case E_V2DImode:
8775 case E_V1TImode:
8776 case E_TImode:
8777 case E_TFmode:
8778 case E_KFmode:
8779 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8780 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8781 a vector mode, if we want to use the VSX registers to move it around,
8782 we need to restrict ourselves to reg+reg addressing. Similarly for
8783 IEEE 128-bit floating point that is passed in a single vector
8784 register. */
8785 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8786 return mode_supports_vsx_dform_quad (mode);
8787 break;
8789 case E_V4HImode:
8790 case E_V2SImode:
8791 case E_V1DImode:
8792 case E_V2SFmode:
8793 /* Paired vector modes. Only reg+reg addressing is valid. */
8794 if (TARGET_PAIRED_FLOAT)
8795 return false;
8796 break;
8798 case E_SDmode:
8799 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8800 addressing for the LFIWZX and STFIWX instructions. */
8801 if (TARGET_NO_SDMODE_STACK)
8802 return false;
8803 break;
8805 default:
8806 break;
8809 return true;
8812 static bool
8813 virtual_stack_registers_memory_p (rtx op)
8815 int regnum;
8817 if (GET_CODE (op) == REG)
8818 regnum = REGNO (op);
8820 else if (GET_CODE (op) == PLUS
8821 && GET_CODE (XEXP (op, 0)) == REG
8822 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8823 regnum = REGNO (XEXP (op, 0));
8825 else
8826 return false;
8828 return (regnum >= FIRST_VIRTUAL_REGISTER
8829 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8832 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8833 is known to not straddle a 32k boundary. This function is used
8834 to determine whether -mcmodel=medium code can use TOC pointer
8835 relative addressing for OP. This means the alignment of the TOC
8836 pointer must also be taken into account, and unfortunately that is
8837 only 8 bytes. */
8839 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8840 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8841 #endif
8843 static bool
8844 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8845 machine_mode mode)
8847 tree decl;
8848 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8850 if (GET_CODE (op) != SYMBOL_REF)
8851 return false;
8853 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8854 SYMBOL_REF. */
8855 if (mode_supports_vsx_dform_quad (mode))
8856 return false;
8858 dsize = GET_MODE_SIZE (mode);
8859 decl = SYMBOL_REF_DECL (op);
8860 if (!decl)
8862 if (dsize == 0)
8863 return false;
8865 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8866 replacing memory addresses with an anchor plus offset. We
8867 could find the decl by rummaging around in the block->objects
8868 VEC for the given offset but that seems like too much work. */
8869 dalign = BITS_PER_UNIT;
8870 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8871 && SYMBOL_REF_ANCHOR_P (op)
8872 && SYMBOL_REF_BLOCK (op) != NULL)
8874 struct object_block *block = SYMBOL_REF_BLOCK (op);
8876 dalign = block->alignment;
8877 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8879 else if (CONSTANT_POOL_ADDRESS_P (op))
8881 /* It would be nice to have get_pool_align().. */
8882 machine_mode cmode = get_pool_mode (op);
8884 dalign = GET_MODE_ALIGNMENT (cmode);
8887 else if (DECL_P (decl))
8889 dalign = DECL_ALIGN (decl);
8891 if (dsize == 0)
8893 /* Allow BLKmode when the entire object is known to not
8894 cross a 32k boundary. */
8895 if (!DECL_SIZE_UNIT (decl))
8896 return false;
8898 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8899 return false;
8901 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8902 if (dsize > 32768)
8903 return false;
8905 dalign /= BITS_PER_UNIT;
8906 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8907 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8908 return dalign >= dsize;
8911 else
8912 gcc_unreachable ();
8914 /* Find how many bits of the alignment we know for this access. */
8915 dalign /= BITS_PER_UNIT;
8916 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8917 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8918 mask = dalign - 1;
8919 lsb = offset & -offset;
8920 mask &= lsb - 1;
8921 dalign = mask + 1;
8923 return dalign >= dsize;
8926 static bool
8927 constant_pool_expr_p (rtx op)
8929 rtx base, offset;
8931 split_const (op, &base, &offset);
8932 return (GET_CODE (base) == SYMBOL_REF
8933 && CONSTANT_POOL_ADDRESS_P (base)
8934 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8937 static const_rtx tocrel_base, tocrel_offset;
8939 /* Return true if OP is a toc pointer relative address (the output
8940 of create_TOC_reference). If STRICT, do not match non-split
8941 -mcmodel=large/medium toc pointer relative addresses. */
8943 bool
8944 toc_relative_expr_p (const_rtx op, bool strict)
8946 if (!TARGET_TOC)
8947 return false;
8949 if (TARGET_CMODEL != CMODEL_SMALL)
8951 /* When strict ensure we have everything tidy. */
8952 if (strict
8953 && !(GET_CODE (op) == LO_SUM
8954 && REG_P (XEXP (op, 0))
8955 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8956 return false;
8958 /* When not strict, allow non-split TOC addresses and also allow
8959 (lo_sum (high ..)) TOC addresses created during reload. */
8960 if (GET_CODE (op) == LO_SUM)
8961 op = XEXP (op, 1);
8964 tocrel_base = op;
8965 tocrel_offset = const0_rtx;
8966 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8968 tocrel_base = XEXP (op, 0);
8969 tocrel_offset = XEXP (op, 1);
8972 return (GET_CODE (tocrel_base) == UNSPEC
8973 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8976 /* Return true if X is a constant pool address, and also for cmodel=medium
8977 if X is a toc-relative address known to be offsettable within MODE. */
8979 bool
8980 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8981 bool strict)
8983 return (toc_relative_expr_p (x, strict)
8984 && (TARGET_CMODEL != CMODEL_MEDIUM
8985 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8986 || mode == QImode
8987 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8988 INTVAL (tocrel_offset), mode)));
8991 static bool
8992 legitimate_small_data_p (machine_mode mode, rtx x)
8994 return (DEFAULT_ABI == ABI_V4
8995 && !flag_pic && !TARGET_TOC
8996 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8997 && small_data_operand (x, mode));
9000 /* SPE offset addressing is limited to 5-bits worth of double words. */
9001 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
9003 bool
9004 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
9005 bool strict, bool worst_case)
9007 unsigned HOST_WIDE_INT offset;
9008 unsigned int extra;
9010 if (GET_CODE (x) != PLUS)
9011 return false;
9012 if (!REG_P (XEXP (x, 0)))
9013 return false;
9014 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9015 return false;
9016 if (mode_supports_vsx_dform_quad (mode))
9017 return quad_address_p (x, mode, strict);
9018 if (!reg_offset_addressing_ok_p (mode))
9019 return virtual_stack_registers_memory_p (x);
9020 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
9021 return true;
9022 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
9023 return false;
9025 offset = INTVAL (XEXP (x, 1));
9026 extra = 0;
9027 switch (mode)
9029 case E_V4HImode:
9030 case E_V2SImode:
9031 case E_V1DImode:
9032 case E_V2SFmode:
9033 /* SPE vector modes. */
9034 return SPE_CONST_OFFSET_OK (offset);
9036 case E_DFmode:
9037 case E_DDmode:
9038 case E_DImode:
9039 /* On e500v2, we may have:
9041 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
9043 Which gets addressed with evldd instructions. */
9044 if (TARGET_E500_DOUBLE)
9045 return SPE_CONST_OFFSET_OK (offset);
9047 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9048 addressing. */
9049 if (VECTOR_MEM_VSX_P (mode))
9050 return false;
9052 if (!worst_case)
9053 break;
9054 if (!TARGET_POWERPC64)
9055 extra = 4;
9056 else if (offset & 3)
9057 return false;
9058 break;
9060 case E_TFmode:
9061 case E_IFmode:
9062 case E_KFmode:
9063 case E_TDmode:
9064 case E_TImode:
9065 case E_PTImode:
9066 if (TARGET_E500_DOUBLE)
9067 return (SPE_CONST_OFFSET_OK (offset)
9068 && SPE_CONST_OFFSET_OK (offset + 8));
9070 extra = 8;
9071 if (!worst_case)
9072 break;
9073 if (!TARGET_POWERPC64)
9074 extra = 12;
9075 else if (offset & 3)
9076 return false;
9077 break;
9079 default:
9080 break;
9083 offset += 0x8000;
9084 return offset < 0x10000 - extra;
9087 bool
9088 legitimate_indexed_address_p (rtx x, int strict)
9090 rtx op0, op1;
9092 if (GET_CODE (x) != PLUS)
9093 return false;
9095 op0 = XEXP (x, 0);
9096 op1 = XEXP (x, 1);
9098 /* Recognize the rtl generated by reload which we know will later be
9099 replaced with proper base and index regs. */
9100 if (!strict
9101 && reload_in_progress
9102 && (REG_P (op0) || GET_CODE (op0) == PLUS)
9103 && REG_P (op1))
9104 return true;
9106 return (REG_P (op0) && REG_P (op1)
9107 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9108 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9109 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9110 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9113 bool
9114 avoiding_indexed_address_p (machine_mode mode)
9116 /* Avoid indexed addressing for modes that have non-indexed
9117 load/store instruction forms. */
9118 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9121 bool
9122 legitimate_indirect_address_p (rtx x, int strict)
9124 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
9127 bool
9128 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9130 if (!TARGET_MACHO || !flag_pic
9131 || mode != SImode || GET_CODE (x) != MEM)
9132 return false;
9133 x = XEXP (x, 0);
9135 if (GET_CODE (x) != LO_SUM)
9136 return false;
9137 if (GET_CODE (XEXP (x, 0)) != REG)
9138 return false;
9139 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9140 return false;
9141 x = XEXP (x, 1);
9143 return CONSTANT_P (x);
9146 static bool
9147 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9149 if (GET_CODE (x) != LO_SUM)
9150 return false;
9151 if (GET_CODE (XEXP (x, 0)) != REG)
9152 return false;
9153 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9154 return false;
9155 /* quad word addresses are restricted, and we can't use LO_SUM. */
9156 if (mode_supports_vsx_dform_quad (mode))
9157 return false;
9158 /* Restrict addressing for DI because of our SUBREG hackery. */
9159 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9160 return false;
9161 x = XEXP (x, 1);
9163 if (TARGET_ELF || TARGET_MACHO)
9165 bool large_toc_ok;
9167 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9168 return false;
9169 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9170 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9171 recognizes some LO_SUM addresses as valid although this
9172 function says opposite. In most cases, LRA through different
9173 transformations can generate correct code for address reloads.
9174 It can not manage only some LO_SUM cases. So we need to add
9175 code analogous to one in rs6000_legitimize_reload_address for
9176 LOW_SUM here saying that some addresses are still valid. */
9177 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9178 && small_toc_ref (x, VOIDmode));
9179 if (TARGET_TOC && ! large_toc_ok)
9180 return false;
9181 if (GET_MODE_NUNITS (mode) != 1)
9182 return false;
9183 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9184 && !(/* ??? Assume floating point reg based on mode? */
9185 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
9186 && (mode == DFmode || mode == DDmode)))
9187 return false;
9189 return CONSTANT_P (x) || large_toc_ok;
9192 return false;
9196 /* Try machine-dependent ways of modifying an illegitimate address
9197 to be legitimate. If we find one, return the new, valid address.
9198 This is used from only one place: `memory_address' in explow.c.
9200 OLDX is the address as it was before break_out_memory_refs was
9201 called. In some cases it is useful to look at this to decide what
9202 needs to be done.
9204 It is always safe for this function to do nothing. It exists to
9205 recognize opportunities to optimize the output.
9207 On RS/6000, first check for the sum of a register with a constant
9208 integer that is out of range. If so, generate code to add the
9209 constant with the low-order 16 bits masked to the register and force
9210 this result into another register (this can be done with `cau').
9211 Then generate an address of REG+(CONST&0xffff), allowing for the
9212 possibility of bit 16 being a one.
9214 Then check for the sum of a register and something not constant, try to
9215 load the other things into a register and return the sum. */
9217 static rtx
9218 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9219 machine_mode mode)
9221 unsigned int extra;
9223 if (!reg_offset_addressing_ok_p (mode)
9224 || mode_supports_vsx_dform_quad (mode))
9226 if (virtual_stack_registers_memory_p (x))
9227 return x;
9229 /* In theory we should not be seeing addresses of the form reg+0,
9230 but just in case it is generated, optimize it away. */
9231 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9232 return force_reg (Pmode, XEXP (x, 0));
9234 /* For TImode with load/store quad, restrict addresses to just a single
9235 pointer, so it works with both GPRs and VSX registers. */
9236 /* Make sure both operands are registers. */
9237 else if (GET_CODE (x) == PLUS
9238 && (mode != TImode || !TARGET_VSX_TIMODE))
9239 return gen_rtx_PLUS (Pmode,
9240 force_reg (Pmode, XEXP (x, 0)),
9241 force_reg (Pmode, XEXP (x, 1)));
9242 else
9243 return force_reg (Pmode, x);
9245 if (GET_CODE (x) == SYMBOL_REF)
9247 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9248 if (model != 0)
9249 return rs6000_legitimize_tls_address (x, model);
9252 extra = 0;
9253 switch (mode)
9255 case E_TFmode:
9256 case E_TDmode:
9257 case E_TImode:
9258 case E_PTImode:
9259 case E_IFmode:
9260 case E_KFmode:
9261 /* As in legitimate_offset_address_p we do not assume
9262 worst-case. The mode here is just a hint as to the registers
9263 used. A TImode is usually in gprs, but may actually be in
9264 fprs. Leave worst-case scenario for reload to handle via
9265 insn constraints. PTImode is only GPRs. */
9266 extra = 8;
9267 break;
9268 default:
9269 break;
9272 if (GET_CODE (x) == PLUS
9273 && GET_CODE (XEXP (x, 0)) == REG
9274 && GET_CODE (XEXP (x, 1)) == CONST_INT
9275 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9276 >= 0x10000 - extra)
9277 && !(SPE_VECTOR_MODE (mode)
9278 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9280 HOST_WIDE_INT high_int, low_int;
9281 rtx sum;
9282 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9283 if (low_int >= 0x8000 - extra)
9284 low_int = 0;
9285 high_int = INTVAL (XEXP (x, 1)) - low_int;
9286 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9287 GEN_INT (high_int)), 0);
9288 return plus_constant (Pmode, sum, low_int);
9290 else if (GET_CODE (x) == PLUS
9291 && GET_CODE (XEXP (x, 0)) == REG
9292 && GET_CODE (XEXP (x, 1)) != CONST_INT
9293 && GET_MODE_NUNITS (mode) == 1
9294 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9295 || (/* ??? Assume floating point reg based on mode? */
9296 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9297 && (mode == DFmode || mode == DDmode)))
9298 && !avoiding_indexed_address_p (mode))
9300 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9301 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9303 else if (SPE_VECTOR_MODE (mode)
9304 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9306 if (mode == DImode)
9307 return x;
9308 /* We accept [reg + reg] and [reg + OFFSET]. */
9310 if (GET_CODE (x) == PLUS)
9312 rtx op1 = XEXP (x, 0);
9313 rtx op2 = XEXP (x, 1);
9314 rtx y;
9316 op1 = force_reg (Pmode, op1);
9318 if (GET_CODE (op2) != REG
9319 && (GET_CODE (op2) != CONST_INT
9320 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9321 || (GET_MODE_SIZE (mode) > 8
9322 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9323 op2 = force_reg (Pmode, op2);
9325 /* We can't always do [reg + reg] for these, because [reg +
9326 reg + offset] is not a legitimate addressing mode. */
9327 y = gen_rtx_PLUS (Pmode, op1, op2);
9329 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9330 return force_reg (Pmode, y);
9331 else
9332 return y;
9335 return force_reg (Pmode, x);
9337 else if ((TARGET_ELF
9338 #if TARGET_MACHO
9339 || !MACHO_DYNAMIC_NO_PIC_P
9340 #endif
9342 && TARGET_32BIT
9343 && TARGET_NO_TOC
9344 && ! flag_pic
9345 && GET_CODE (x) != CONST_INT
9346 && GET_CODE (x) != CONST_WIDE_INT
9347 && GET_CODE (x) != CONST_DOUBLE
9348 && CONSTANT_P (x)
9349 && GET_MODE_NUNITS (mode) == 1
9350 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9351 || (/* ??? Assume floating point reg based on mode? */
9352 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9353 && (mode == DFmode || mode == DDmode))))
9355 rtx reg = gen_reg_rtx (Pmode);
9356 if (TARGET_ELF)
9357 emit_insn (gen_elf_high (reg, x));
9358 else
9359 emit_insn (gen_macho_high (reg, x));
9360 return gen_rtx_LO_SUM (Pmode, reg, x);
9362 else if (TARGET_TOC
9363 && GET_CODE (x) == SYMBOL_REF
9364 && constant_pool_expr_p (x)
9365 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9366 return create_TOC_reference (x, NULL_RTX);
9367 else
9368 return x;
9371 /* Debug version of rs6000_legitimize_address. */
9372 static rtx
9373 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9375 rtx ret;
9376 rtx_insn *insns;
9378 start_sequence ();
9379 ret = rs6000_legitimize_address (x, oldx, mode);
9380 insns = get_insns ();
9381 end_sequence ();
9383 if (ret != x)
9385 fprintf (stderr,
9386 "\nrs6000_legitimize_address: mode %s, old code %s, "
9387 "new code %s, modified\n",
9388 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9389 GET_RTX_NAME (GET_CODE (ret)));
9391 fprintf (stderr, "Original address:\n");
9392 debug_rtx (x);
9394 fprintf (stderr, "oldx:\n");
9395 debug_rtx (oldx);
9397 fprintf (stderr, "New address:\n");
9398 debug_rtx (ret);
9400 if (insns)
9402 fprintf (stderr, "Insns added:\n");
9403 debug_rtx_list (insns, 20);
9406 else
9408 fprintf (stderr,
9409 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9410 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9412 debug_rtx (x);
9415 if (insns)
9416 emit_insn (insns);
9418 return ret;
9421 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9422 We need to emit DTP-relative relocations. */
9424 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9425 static void
9426 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9428 switch (size)
9430 case 4:
9431 fputs ("\t.long\t", file);
9432 break;
9433 case 8:
9434 fputs (DOUBLE_INT_ASM_OP, file);
9435 break;
9436 default:
9437 gcc_unreachable ();
9439 output_addr_const (file, x);
9440 if (TARGET_ELF)
9441 fputs ("@dtprel+0x8000", file);
9442 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9444 switch (SYMBOL_REF_TLS_MODEL (x))
9446 case 0:
9447 break;
9448 case TLS_MODEL_LOCAL_EXEC:
9449 fputs ("@le", file);
9450 break;
9451 case TLS_MODEL_INITIAL_EXEC:
9452 fputs ("@ie", file);
9453 break;
9454 case TLS_MODEL_GLOBAL_DYNAMIC:
9455 case TLS_MODEL_LOCAL_DYNAMIC:
9456 fputs ("@m", file);
9457 break;
9458 default:
9459 gcc_unreachable ();
9464 /* Return true if X is a symbol that refers to real (rather than emulated)
9465 TLS. */
9467 static bool
9468 rs6000_real_tls_symbol_ref_p (rtx x)
9470 return (GET_CODE (x) == SYMBOL_REF
9471 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9474 /* In the name of slightly smaller debug output, and to cater to
9475 general assembler lossage, recognize various UNSPEC sequences
9476 and turn them back into a direct symbol reference. */
9478 static rtx
9479 rs6000_delegitimize_address (rtx orig_x)
9481 rtx x, y, offset;
9483 orig_x = delegitimize_mem_from_attrs (orig_x);
9484 x = orig_x;
9485 if (MEM_P (x))
9486 x = XEXP (x, 0);
9488 y = x;
9489 if (TARGET_CMODEL != CMODEL_SMALL
9490 && GET_CODE (y) == LO_SUM)
9491 y = XEXP (y, 1);
9493 offset = NULL_RTX;
9494 if (GET_CODE (y) == PLUS
9495 && GET_MODE (y) == Pmode
9496 && CONST_INT_P (XEXP (y, 1)))
9498 offset = XEXP (y, 1);
9499 y = XEXP (y, 0);
9502 if (GET_CODE (y) == UNSPEC
9503 && XINT (y, 1) == UNSPEC_TOCREL)
9505 y = XVECEXP (y, 0, 0);
9507 #ifdef HAVE_AS_TLS
9508 /* Do not associate thread-local symbols with the original
9509 constant pool symbol. */
9510 if (TARGET_XCOFF
9511 && GET_CODE (y) == SYMBOL_REF
9512 && CONSTANT_POOL_ADDRESS_P (y)
9513 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9514 return orig_x;
9515 #endif
9517 if (offset != NULL_RTX)
9518 y = gen_rtx_PLUS (Pmode, y, offset);
9519 if (!MEM_P (orig_x))
9520 return y;
9521 else
9522 return replace_equiv_address_nv (orig_x, y);
9525 if (TARGET_MACHO
9526 && GET_CODE (orig_x) == LO_SUM
9527 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9529 y = XEXP (XEXP (orig_x, 1), 0);
9530 if (GET_CODE (y) == UNSPEC
9531 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9532 return XVECEXP (y, 0, 0);
9535 return orig_x;
9538 /* Return true if X shouldn't be emitted into the debug info.
9539 The linker doesn't like .toc section references from
9540 .debug_* sections, so reject .toc section symbols. */
9542 static bool
9543 rs6000_const_not_ok_for_debug_p (rtx x)
9545 if (GET_CODE (x) == UNSPEC)
9546 return true;
9547 if (GET_CODE (x) == SYMBOL_REF
9548 && CONSTANT_POOL_ADDRESS_P (x))
9550 rtx c = get_pool_constant (x);
9551 machine_mode cmode = get_pool_mode (x);
9552 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9553 return true;
9556 return false;
9560 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9562 static bool
9563 rs6000_legitimate_combined_insn (rtx_insn *insn)
9565 int icode = INSN_CODE (insn);
9567 /* Reject creating doloop insns. Combine should not be allowed
9568 to create these for a number of reasons:
9569 1) In a nested loop, if combine creates one of these in an
9570 outer loop and the register allocator happens to allocate ctr
9571 to the outer loop insn, then the inner loop can't use ctr.
9572 Inner loops ought to be more highly optimized.
9573 2) Combine often wants to create one of these from what was
9574 originally a three insn sequence, first combining the three
9575 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9576 allocated ctr, the splitter takes use back to the three insn
9577 sequence. It's better to stop combine at the two insn
9578 sequence.
9579 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9580 insns, the register allocator sometimes uses floating point
9581 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9582 jump insn and output reloads are not implemented for jumps,
9583 the ctrsi/ctrdi splitters need to handle all possible cases.
9584 That's a pain, and it gets to be seriously difficult when a
9585 splitter that runs after reload needs memory to transfer from
9586 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9587 for the difficult case. It's better to not create problems
9588 in the first place. */
9589 if (icode != CODE_FOR_nothing
9590 && (icode == CODE_FOR_ctrsi_internal1
9591 || icode == CODE_FOR_ctrdi_internal1
9592 || icode == CODE_FOR_ctrsi_internal2
9593 || icode == CODE_FOR_ctrdi_internal2
9594 || icode == CODE_FOR_ctrsi_internal3
9595 || icode == CODE_FOR_ctrdi_internal3
9596 || icode == CODE_FOR_ctrsi_internal4
9597 || icode == CODE_FOR_ctrdi_internal4))
9598 return false;
9600 return true;
9603 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9605 static GTY(()) rtx rs6000_tls_symbol;
9606 static rtx
9607 rs6000_tls_get_addr (void)
9609 if (!rs6000_tls_symbol)
9610 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9612 return rs6000_tls_symbol;
9615 /* Construct the SYMBOL_REF for TLS GOT references. */
9617 static GTY(()) rtx rs6000_got_symbol;
9618 static rtx
9619 rs6000_got_sym (void)
9621 if (!rs6000_got_symbol)
9623 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9624 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9625 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9628 return rs6000_got_symbol;
9631 /* AIX Thread-Local Address support. */
9633 static rtx
9634 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9636 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9637 const char *name;
9638 char *tlsname;
9640 name = XSTR (addr, 0);
9641 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9642 or the symbol will be in TLS private data section. */
9643 if (name[strlen (name) - 1] != ']'
9644 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9645 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9647 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9648 strcpy (tlsname, name);
9649 strcat (tlsname,
9650 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9651 tlsaddr = copy_rtx (addr);
9652 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9654 else
9655 tlsaddr = addr;
9657 /* Place addr into TOC constant pool. */
9658 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9660 /* Output the TOC entry and create the MEM referencing the value. */
9661 if (constant_pool_expr_p (XEXP (sym, 0))
9662 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9664 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9665 mem = gen_const_mem (Pmode, tocref);
9666 set_mem_alias_set (mem, get_TOC_alias_set ());
9668 else
9669 return sym;
9671 /* Use global-dynamic for local-dynamic. */
9672 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9673 || model == TLS_MODEL_LOCAL_DYNAMIC)
9675 /* Create new TOC reference for @m symbol. */
9676 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9677 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9678 strcpy (tlsname, "*LCM");
9679 strcat (tlsname, name + 3);
9680 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9681 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9682 tocref = create_TOC_reference (modaddr, NULL_RTX);
9683 rtx modmem = gen_const_mem (Pmode, tocref);
9684 set_mem_alias_set (modmem, get_TOC_alias_set ());
9686 rtx modreg = gen_reg_rtx (Pmode);
9687 emit_insn (gen_rtx_SET (modreg, modmem));
9689 tmpreg = gen_reg_rtx (Pmode);
9690 emit_insn (gen_rtx_SET (tmpreg, mem));
9692 dest = gen_reg_rtx (Pmode);
9693 if (TARGET_32BIT)
9694 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9695 else
9696 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9697 return dest;
9699 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9700 else if (TARGET_32BIT)
9702 tlsreg = gen_reg_rtx (SImode);
9703 emit_insn (gen_tls_get_tpointer (tlsreg));
9705 else
9706 tlsreg = gen_rtx_REG (DImode, 13);
9708 /* Load the TOC value into temporary register. */
9709 tmpreg = gen_reg_rtx (Pmode);
9710 emit_insn (gen_rtx_SET (tmpreg, mem));
9711 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9712 gen_rtx_MINUS (Pmode, addr, tlsreg));
9714 /* Add TOC symbol value to TLS pointer. */
9715 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9717 return dest;
9720 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9721 this (thread-local) address. */
9723 static rtx
9724 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9726 rtx dest, insn;
9728 if (TARGET_XCOFF)
9729 return rs6000_legitimize_tls_address_aix (addr, model);
9731 dest = gen_reg_rtx (Pmode);
9732 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9734 rtx tlsreg;
9736 if (TARGET_64BIT)
9738 tlsreg = gen_rtx_REG (Pmode, 13);
9739 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9741 else
9743 tlsreg = gen_rtx_REG (Pmode, 2);
9744 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9746 emit_insn (insn);
9748 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9750 rtx tlsreg, tmp;
9752 tmp = gen_reg_rtx (Pmode);
9753 if (TARGET_64BIT)
9755 tlsreg = gen_rtx_REG (Pmode, 13);
9756 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9758 else
9760 tlsreg = gen_rtx_REG (Pmode, 2);
9761 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9763 emit_insn (insn);
9764 if (TARGET_64BIT)
9765 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9766 else
9767 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9768 emit_insn (insn);
9770 else
9772 rtx r3, got, tga, tmp1, tmp2, call_insn;
9774 /* We currently use relocations like @got@tlsgd for tls, which
9775 means the linker will handle allocation of tls entries, placing
9776 them in the .got section. So use a pointer to the .got section,
9777 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9778 or to secondary GOT sections used by 32-bit -fPIC. */
9779 if (TARGET_64BIT)
9780 got = gen_rtx_REG (Pmode, 2);
9781 else
9783 if (flag_pic == 1)
9784 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9785 else
9787 rtx gsym = rs6000_got_sym ();
9788 got = gen_reg_rtx (Pmode);
9789 if (flag_pic == 0)
9790 rs6000_emit_move (got, gsym, Pmode);
9791 else
9793 rtx mem, lab;
9795 tmp1 = gen_reg_rtx (Pmode);
9796 tmp2 = gen_reg_rtx (Pmode);
9797 mem = gen_const_mem (Pmode, tmp1);
9798 lab = gen_label_rtx ();
9799 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9800 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9801 if (TARGET_LINK_STACK)
9802 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9803 emit_move_insn (tmp2, mem);
9804 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9805 set_unique_reg_note (last, REG_EQUAL, gsym);
9810 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9812 tga = rs6000_tls_get_addr ();
9813 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9814 const0_rtx, Pmode);
9816 r3 = gen_rtx_REG (Pmode, 3);
9817 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9819 if (TARGET_64BIT)
9820 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9821 else
9822 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9824 else if (DEFAULT_ABI == ABI_V4)
9825 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9826 else
9827 gcc_unreachable ();
9828 call_insn = last_call_insn ();
9829 PATTERN (call_insn) = insn;
9830 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9831 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9832 pic_offset_table_rtx);
9834 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9836 tga = rs6000_tls_get_addr ();
9837 tmp1 = gen_reg_rtx (Pmode);
9838 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9839 const0_rtx, Pmode);
9841 r3 = gen_rtx_REG (Pmode, 3);
9842 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9844 if (TARGET_64BIT)
9845 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9846 else
9847 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9849 else if (DEFAULT_ABI == ABI_V4)
9850 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9851 else
9852 gcc_unreachable ();
9853 call_insn = last_call_insn ();
9854 PATTERN (call_insn) = insn;
9855 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9856 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9857 pic_offset_table_rtx);
9859 if (rs6000_tls_size == 16)
9861 if (TARGET_64BIT)
9862 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9863 else
9864 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9866 else if (rs6000_tls_size == 32)
9868 tmp2 = gen_reg_rtx (Pmode);
9869 if (TARGET_64BIT)
9870 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9871 else
9872 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9873 emit_insn (insn);
9874 if (TARGET_64BIT)
9875 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9876 else
9877 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9879 else
9881 tmp2 = gen_reg_rtx (Pmode);
9882 if (TARGET_64BIT)
9883 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9884 else
9885 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9886 emit_insn (insn);
9887 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9889 emit_insn (insn);
9891 else
9893 /* IE, or 64-bit offset LE. */
9894 tmp2 = gen_reg_rtx (Pmode);
9895 if (TARGET_64BIT)
9896 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9897 else
9898 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9899 emit_insn (insn);
9900 if (TARGET_64BIT)
9901 insn = gen_tls_tls_64 (dest, tmp2, addr);
9902 else
9903 insn = gen_tls_tls_32 (dest, tmp2, addr);
9904 emit_insn (insn);
9908 return dest;
9911 /* Only create the global variable for the stack protect guard if we are using
9912 the global flavor of that guard. */
9913 static tree
9914 rs6000_init_stack_protect_guard (void)
9916 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9917 return default_stack_protect_guard ();
9919 return NULL_TREE;
9922 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9924 static bool
9925 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9927 if (GET_CODE (x) == HIGH
9928 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9929 return true;
9931 /* A TLS symbol in the TOC cannot contain a sum. */
9932 if (GET_CODE (x) == CONST
9933 && GET_CODE (XEXP (x, 0)) == PLUS
9934 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9935 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9936 return true;
9938 /* Do not place an ELF TLS symbol in the constant pool. */
9939 return TARGET_ELF && tls_referenced_p (x);
9942 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9943 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9944 can be addressed relative to the toc pointer. */
9946 static bool
9947 use_toc_relative_ref (rtx sym, machine_mode mode)
9949 return ((constant_pool_expr_p (sym)
9950 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9951 get_pool_mode (sym)))
9952 || (TARGET_CMODEL == CMODEL_MEDIUM
9953 && SYMBOL_REF_LOCAL_P (sym)
9954 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9957 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9958 replace the input X, or the original X if no replacement is called for.
9959 The output parameter *WIN is 1 if the calling macro should goto WIN,
9960 0 if it should not.
9962 For RS/6000, we wish to handle large displacements off a base
9963 register by splitting the addend across an addiu/addis and the mem insn.
9964 This cuts number of extra insns needed from 3 to 1.
9966 On Darwin, we use this to generate code for floating point constants.
9967 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9968 The Darwin code is inside #if TARGET_MACHO because only then are the
9969 machopic_* functions defined. */
9970 static rtx
9971 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9972 int opnum, int type,
9973 int ind_levels ATTRIBUTE_UNUSED, int *win)
9975 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9976 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9978 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9979 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9980 if (reg_offset_p
9981 && opnum == 1
9982 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9983 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9984 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9985 && TARGET_P9_VECTOR)
9986 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9987 && TARGET_P9_VECTOR)))
9988 reg_offset_p = false;
9990 /* We must recognize output that we have already generated ourselves. */
9991 if (GET_CODE (x) == PLUS
9992 && GET_CODE (XEXP (x, 0)) == PLUS
9993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9994 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9995 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9997 if (TARGET_DEBUG_ADDR)
9999 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
10000 debug_rtx (x);
10002 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10003 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10004 opnum, (enum reload_type) type);
10005 *win = 1;
10006 return x;
10009 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
10010 if (GET_CODE (x) == LO_SUM
10011 && GET_CODE (XEXP (x, 0)) == HIGH)
10013 if (TARGET_DEBUG_ADDR)
10015 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
10016 debug_rtx (x);
10018 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10019 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10020 opnum, (enum reload_type) type);
10021 *win = 1;
10022 return x;
10025 #if TARGET_MACHO
10026 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
10027 && GET_CODE (x) == LO_SUM
10028 && GET_CODE (XEXP (x, 0)) == PLUS
10029 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
10030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
10031 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
10032 && machopic_operand_p (XEXP (x, 1)))
10034 /* Result of previous invocation of this function on Darwin
10035 floating point constant. */
10036 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10037 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10038 opnum, (enum reload_type) type);
10039 *win = 1;
10040 return x;
10042 #endif
10044 if (TARGET_CMODEL != CMODEL_SMALL
10045 && reg_offset_p
10046 && !quad_offset_p
10047 && small_toc_ref (x, VOIDmode))
10049 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
10050 x = gen_rtx_LO_SUM (Pmode, hi, x);
10051 if (TARGET_DEBUG_ADDR)
10053 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
10054 debug_rtx (x);
10056 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10057 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10058 opnum, (enum reload_type) type);
10059 *win = 1;
10060 return x;
10063 if (GET_CODE (x) == PLUS
10064 && REG_P (XEXP (x, 0))
10065 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
10066 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
10067 && CONST_INT_P (XEXP (x, 1))
10068 && reg_offset_p
10069 && !SPE_VECTOR_MODE (mode)
10070 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10071 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
10073 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
10074 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
10075 HOST_WIDE_INT high
10076 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
10078 /* Check for 32-bit overflow or quad addresses with one of the
10079 four least significant bits set. */
10080 if (high + low != val
10081 || (quad_offset_p && (low & 0xf)))
10083 *win = 0;
10084 return x;
10087 /* Reload the high part into a base reg; leave the low part
10088 in the mem directly. */
10090 x = gen_rtx_PLUS (GET_MODE (x),
10091 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
10092 GEN_INT (high)),
10093 GEN_INT (low));
10095 if (TARGET_DEBUG_ADDR)
10097 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
10098 debug_rtx (x);
10100 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10101 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10102 opnum, (enum reload_type) type);
10103 *win = 1;
10104 return x;
10107 if (GET_CODE (x) == SYMBOL_REF
10108 && reg_offset_p
10109 && !quad_offset_p
10110 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
10111 && !SPE_VECTOR_MODE (mode)
10112 #if TARGET_MACHO
10113 && DEFAULT_ABI == ABI_DARWIN
10114 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
10115 && machopic_symbol_defined_p (x)
10116 #else
10117 && DEFAULT_ABI == ABI_V4
10118 && !flag_pic
10119 #endif
10120 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10121 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10122 without fprs.
10123 ??? Assume floating point reg based on mode? This assumption is
10124 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10125 where reload ends up doing a DFmode load of a constant from
10126 mem using two gprs. Unfortunately, at this point reload
10127 hasn't yet selected regs so poking around in reload data
10128 won't help and even if we could figure out the regs reliably,
10129 we'd still want to allow this transformation when the mem is
10130 naturally aligned. Since we say the address is good here, we
10131 can't disable offsets from LO_SUMs in mem_operand_gpr.
10132 FIXME: Allow offset from lo_sum for other modes too, when
10133 mem is sufficiently aligned.
10135 Also disallow this if the type can go in VMX/Altivec registers, since
10136 those registers do not have d-form (reg+offset) address modes. */
10137 && !reg_addr[mode].scalar_in_vmx_p
10138 && mode != TFmode
10139 && mode != TDmode
10140 && mode != IFmode
10141 && mode != KFmode
10142 && (mode != TImode || !TARGET_VSX_TIMODE)
10143 && mode != PTImode
10144 && (mode != DImode || TARGET_POWERPC64)
10145 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
10146 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
10148 #if TARGET_MACHO
10149 if (flag_pic)
10151 rtx offset = machopic_gen_offset (x);
10152 x = gen_rtx_LO_SUM (GET_MODE (x),
10153 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10154 gen_rtx_HIGH (Pmode, offset)), offset);
10156 else
10157 #endif
10158 x = gen_rtx_LO_SUM (GET_MODE (x),
10159 gen_rtx_HIGH (Pmode, x), x);
10161 if (TARGET_DEBUG_ADDR)
10163 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
10164 debug_rtx (x);
10166 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10167 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10168 opnum, (enum reload_type) type);
10169 *win = 1;
10170 return x;
10173 /* Reload an offset address wrapped by an AND that represents the
10174 masking of the lower bits. Strip the outer AND and let reload
10175 convert the offset address into an indirect address. For VSX,
10176 force reload to create the address with an AND in a separate
10177 register, because we can't guarantee an altivec register will
10178 be used. */
10179 if (VECTOR_MEM_ALTIVEC_P (mode)
10180 && GET_CODE (x) == AND
10181 && GET_CODE (XEXP (x, 0)) == PLUS
10182 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
10183 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
10184 && GET_CODE (XEXP (x, 1)) == CONST_INT
10185 && INTVAL (XEXP (x, 1)) == -16)
10187 x = XEXP (x, 0);
10188 *win = 1;
10189 return x;
10192 if (TARGET_TOC
10193 && reg_offset_p
10194 && !quad_offset_p
10195 && GET_CODE (x) == SYMBOL_REF
10196 && use_toc_relative_ref (x, mode))
10198 x = create_TOC_reference (x, NULL_RTX);
10199 if (TARGET_CMODEL != CMODEL_SMALL)
10201 if (TARGET_DEBUG_ADDR)
10203 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
10204 debug_rtx (x);
10206 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10207 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10208 opnum, (enum reload_type) type);
10210 *win = 1;
10211 return x;
10213 *win = 0;
10214 return x;
10217 /* Debug version of rs6000_legitimize_reload_address. */
10218 static rtx
10219 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
10220 int opnum, int type,
10221 int ind_levels, int *win)
10223 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
10224 ind_levels, win);
10225 fprintf (stderr,
10226 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10227 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10228 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
10229 debug_rtx (x);
10231 if (x == ret)
10232 fprintf (stderr, "Same address returned\n");
10233 else if (!ret)
10234 fprintf (stderr, "NULL returned\n");
10235 else
10237 fprintf (stderr, "New address:\n");
10238 debug_rtx (ret);
10241 return ret;
10244 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10245 that is a valid memory address for an instruction.
10246 The MODE argument is the machine mode for the MEM expression
10247 that wants to use this address.
10249 On the RS/6000, there are four valid address: a SYMBOL_REF that
10250 refers to a constant pool entry of an address (or the sum of it
10251 plus a constant), a short (16-bit signed) constant plus a register,
10252 the sum of two registers, or a register indirect, possibly with an
10253 auto-increment. For DFmode, DDmode and DImode with a constant plus
10254 register, we must ensure that both words are addressable or PowerPC64
10255 with offset word aligned.
10257 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10258 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10259 because adjacent memory cells are accessed by adding word-sized offsets
10260 during assembly output. */
10261 static bool
10262 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10264 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10265 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10267 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10268 if (VECTOR_MEM_ALTIVEC_P (mode)
10269 && GET_CODE (x) == AND
10270 && GET_CODE (XEXP (x, 1)) == CONST_INT
10271 && INTVAL (XEXP (x, 1)) == -16)
10272 x = XEXP (x, 0);
10274 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10275 return 0;
10276 if (legitimate_indirect_address_p (x, reg_ok_strict))
10277 return 1;
10278 if (TARGET_UPDATE
10279 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10280 && mode_supports_pre_incdec_p (mode)
10281 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10282 return 1;
10283 /* Handle restricted vector d-form offsets in ISA 3.0. */
10284 if (quad_offset_p)
10286 if (quad_address_p (x, mode, reg_ok_strict))
10287 return 1;
10289 else if (virtual_stack_registers_memory_p (x))
10290 return 1;
10292 else if (reg_offset_p)
10294 if (legitimate_small_data_p (mode, x))
10295 return 1;
10296 if (legitimate_constant_pool_address_p (x, mode,
10297 reg_ok_strict || lra_in_progress))
10298 return 1;
10299 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10300 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10301 return 1;
10304 /* For TImode, if we have TImode in VSX registers, only allow register
10305 indirect addresses. This will allow the values to go in either GPRs
10306 or VSX registers without reloading. The vector types would tend to
10307 go into VSX registers, so we allow REG+REG, while TImode seems
10308 somewhat split, in that some uses are GPR based, and some VSX based. */
10309 /* FIXME: We could loosen this by changing the following to
10310 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10311 but currently we cannot allow REG+REG addressing for TImode. See
10312 PR72827 for complete details on how this ends up hoodwinking DSE. */
10313 if (mode == TImode && TARGET_VSX_TIMODE)
10314 return 0;
10315 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10316 if (! reg_ok_strict
10317 && reg_offset_p
10318 && GET_CODE (x) == PLUS
10319 && GET_CODE (XEXP (x, 0)) == REG
10320 && (XEXP (x, 0) == virtual_stack_vars_rtx
10321 || XEXP (x, 0) == arg_pointer_rtx)
10322 && GET_CODE (XEXP (x, 1)) == CONST_INT)
10323 return 1;
10324 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10325 return 1;
10326 if (!FLOAT128_2REG_P (mode)
10327 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10328 || TARGET_POWERPC64
10329 || (mode != DFmode && mode != DDmode)
10330 || (TARGET_E500_DOUBLE && mode != DDmode))
10331 && (TARGET_POWERPC64 || mode != DImode)
10332 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10333 && mode != PTImode
10334 && !avoiding_indexed_address_p (mode)
10335 && legitimate_indexed_address_p (x, reg_ok_strict))
10336 return 1;
10337 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10338 && mode_supports_pre_modify_p (mode)
10339 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10340 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10341 reg_ok_strict, false)
10342 || (!avoiding_indexed_address_p (mode)
10343 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10344 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10345 return 1;
10346 if (reg_offset_p && !quad_offset_p
10347 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10348 return 1;
10349 return 0;
10352 /* Debug version of rs6000_legitimate_address_p. */
10353 static bool
10354 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10355 bool reg_ok_strict)
10357 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10358 fprintf (stderr,
10359 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10360 "strict = %d, reload = %s, code = %s\n",
10361 ret ? "true" : "false",
10362 GET_MODE_NAME (mode),
10363 reg_ok_strict,
10364 (reload_completed
10365 ? "after"
10366 : (reload_in_progress ? "progress" : "before")),
10367 GET_RTX_NAME (GET_CODE (x)));
10368 debug_rtx (x);
10370 return ret;
10373 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10375 static bool
10376 rs6000_mode_dependent_address_p (const_rtx addr,
10377 addr_space_t as ATTRIBUTE_UNUSED)
10379 return rs6000_mode_dependent_address_ptr (addr);
10382 /* Go to LABEL if ADDR (a legitimate address expression)
10383 has an effect that depends on the machine mode it is used for.
10385 On the RS/6000 this is true of all integral offsets (since AltiVec
10386 and VSX modes don't allow them) or is a pre-increment or decrement.
10388 ??? Except that due to conceptual problems in offsettable_address_p
10389 we can't really report the problems of integral offsets. So leave
10390 this assuming that the adjustable offset must be valid for the
10391 sub-words of a TFmode operand, which is what we had before. */
10393 static bool
10394 rs6000_mode_dependent_address (const_rtx addr)
10396 switch (GET_CODE (addr))
10398 case PLUS:
10399 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10400 is considered a legitimate address before reload, so there
10401 are no offset restrictions in that case. Note that this
10402 condition is safe in strict mode because any address involving
10403 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10404 been rejected as illegitimate. */
10405 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10406 && XEXP (addr, 0) != arg_pointer_rtx
10407 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10409 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10410 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10412 break;
10414 case LO_SUM:
10415 /* Anything in the constant pool is sufficiently aligned that
10416 all bytes have the same high part address. */
10417 return !legitimate_constant_pool_address_p (addr, QImode, false);
10419 /* Auto-increment cases are now treated generically in recog.c. */
10420 case PRE_MODIFY:
10421 return TARGET_UPDATE;
10423 /* AND is only allowed in Altivec loads. */
10424 case AND:
10425 return true;
10427 default:
10428 break;
10431 return false;
10434 /* Debug version of rs6000_mode_dependent_address. */
10435 static bool
10436 rs6000_debug_mode_dependent_address (const_rtx addr)
10438 bool ret = rs6000_mode_dependent_address (addr);
10440 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10441 ret ? "true" : "false");
10442 debug_rtx (addr);
10444 return ret;
10447 /* Implement FIND_BASE_TERM. */
10450 rs6000_find_base_term (rtx op)
10452 rtx base;
10454 base = op;
10455 if (GET_CODE (base) == CONST)
10456 base = XEXP (base, 0);
10457 if (GET_CODE (base) == PLUS)
10458 base = XEXP (base, 0);
10459 if (GET_CODE (base) == UNSPEC)
10460 switch (XINT (base, 1))
10462 case UNSPEC_TOCREL:
10463 case UNSPEC_MACHOPIC_OFFSET:
10464 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10465 for aliasing purposes. */
10466 return XVECEXP (base, 0, 0);
10469 return op;
10472 /* More elaborate version of recog's offsettable_memref_p predicate
10473 that works around the ??? note of rs6000_mode_dependent_address.
10474 In particular it accepts
10476 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10478 in 32-bit mode, that the recog predicate rejects. */
10480 static bool
10481 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10483 bool worst_case;
10485 if (!MEM_P (op))
10486 return false;
10488 /* First mimic offsettable_memref_p. */
10489 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10490 return true;
10492 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10493 the latter predicate knows nothing about the mode of the memory
10494 reference and, therefore, assumes that it is the largest supported
10495 mode (TFmode). As a consequence, legitimate offsettable memory
10496 references are rejected. rs6000_legitimate_offset_address_p contains
10497 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10498 at least with a little bit of help here given that we know the
10499 actual registers used. */
10500 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10501 || GET_MODE_SIZE (reg_mode) == 4);
10502 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10503 true, worst_case);
10506 /* Determine the reassociation width to be used in reassociate_bb.
10507 This takes into account how many parallel operations we
10508 can actually do of a given type, and also the latency.
10510 int add/sub 6/cycle
10511 mul 2/cycle
10512 vect add/sub/mul 2/cycle
10513 fp add/sub/mul 2/cycle
10514 dfp 1/cycle
10517 static int
10518 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10519 machine_mode mode)
10521 switch (rs6000_cpu)
10523 case PROCESSOR_POWER8:
10524 case PROCESSOR_POWER9:
10525 if (DECIMAL_FLOAT_MODE_P (mode))
10526 return 1;
10527 if (VECTOR_MODE_P (mode))
10528 return 4;
10529 if (INTEGRAL_MODE_P (mode))
10530 return opc == MULT_EXPR ? 4 : 6;
10531 if (FLOAT_MODE_P (mode))
10532 return 4;
10533 break;
10534 default:
10535 break;
10537 return 1;
10540 /* Change register usage conditional on target flags. */
10541 static void
10542 rs6000_conditional_register_usage (void)
10544 int i;
10546 if (TARGET_DEBUG_TARGET)
10547 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10549 /* Set MQ register fixed (already call_used) so that it will not be
10550 allocated. */
10551 fixed_regs[64] = 1;
10553 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10554 if (TARGET_64BIT)
10555 fixed_regs[13] = call_used_regs[13]
10556 = call_really_used_regs[13] = 1;
10558 /* Conditionally disable FPRs. */
10559 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10560 for (i = 32; i < 64; i++)
10561 fixed_regs[i] = call_used_regs[i]
10562 = call_really_used_regs[i] = 1;
10564 /* The TOC register is not killed across calls in a way that is
10565 visible to the compiler. */
10566 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10567 call_really_used_regs[2] = 0;
10569 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10570 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10572 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10573 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10574 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10575 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10577 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10578 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10579 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10580 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10582 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10583 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10584 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10586 if (TARGET_SPE)
10588 global_regs[SPEFSCR_REGNO] = 1;
10589 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10590 registers in prologues and epilogues. We no longer use r14
10591 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10592 pool for link-compatibility with older versions of GCC. Once
10593 "old" code has died out, we can return r14 to the allocation
10594 pool. */
10595 fixed_regs[14]
10596 = call_used_regs[14]
10597 = call_really_used_regs[14] = 1;
10600 if (!TARGET_ALTIVEC && !TARGET_VSX)
10602 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10603 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10604 call_really_used_regs[VRSAVE_REGNO] = 1;
10607 if (TARGET_ALTIVEC || TARGET_VSX)
10608 global_regs[VSCR_REGNO] = 1;
10610 if (TARGET_ALTIVEC_ABI)
10612 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10613 call_used_regs[i] = call_really_used_regs[i] = 1;
10615 /* AIX reserves VR20:31 in non-extended ABI mode. */
10616 if (TARGET_XCOFF)
10617 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10618 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10623 /* Output insns to set DEST equal to the constant SOURCE as a series of
10624 lis, ori and shl instructions and return TRUE. */
10626 bool
10627 rs6000_emit_set_const (rtx dest, rtx source)
10629 machine_mode mode = GET_MODE (dest);
10630 rtx temp, set;
10631 rtx_insn *insn;
10632 HOST_WIDE_INT c;
10634 gcc_checking_assert (CONST_INT_P (source));
10635 c = INTVAL (source);
10636 switch (mode)
10638 case E_QImode:
10639 case E_HImode:
10640 emit_insn (gen_rtx_SET (dest, source));
10641 return true;
10643 case E_SImode:
10644 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10646 emit_insn (gen_rtx_SET (copy_rtx (temp),
10647 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10648 emit_insn (gen_rtx_SET (dest,
10649 gen_rtx_IOR (SImode, copy_rtx (temp),
10650 GEN_INT (c & 0xffff))));
10651 break;
10653 case E_DImode:
10654 if (!TARGET_POWERPC64)
10656 rtx hi, lo;
10658 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10659 DImode);
10660 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10661 DImode);
10662 emit_move_insn (hi, GEN_INT (c >> 32));
10663 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10664 emit_move_insn (lo, GEN_INT (c));
10666 else
10667 rs6000_emit_set_long_const (dest, c);
10668 break;
10670 default:
10671 gcc_unreachable ();
10674 insn = get_last_insn ();
10675 set = single_set (insn);
10676 if (! CONSTANT_P (SET_SRC (set)))
10677 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10679 return true;
10682 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10683 Output insns to set DEST equal to the constant C as a series of
10684 lis, ori and shl instructions. */
10686 static void
10687 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10689 rtx temp;
10690 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10692 ud1 = c & 0xffff;
10693 c = c >> 16;
10694 ud2 = c & 0xffff;
10695 c = c >> 16;
10696 ud3 = c & 0xffff;
10697 c = c >> 16;
10698 ud4 = c & 0xffff;
10700 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10701 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10702 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10704 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10705 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10707 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10709 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10710 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10711 if (ud1 != 0)
10712 emit_move_insn (dest,
10713 gen_rtx_IOR (DImode, copy_rtx (temp),
10714 GEN_INT (ud1)));
10716 else if (ud3 == 0 && ud4 == 0)
10718 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10720 gcc_assert (ud2 & 0x8000);
10721 emit_move_insn (copy_rtx (temp),
10722 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10723 if (ud1 != 0)
10724 emit_move_insn (copy_rtx (temp),
10725 gen_rtx_IOR (DImode, copy_rtx (temp),
10726 GEN_INT (ud1)));
10727 emit_move_insn (dest,
10728 gen_rtx_ZERO_EXTEND (DImode,
10729 gen_lowpart (SImode,
10730 copy_rtx (temp))));
10732 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10733 || (ud4 == 0 && ! (ud3 & 0x8000)))
10735 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10737 emit_move_insn (copy_rtx (temp),
10738 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10739 if (ud2 != 0)
10740 emit_move_insn (copy_rtx (temp),
10741 gen_rtx_IOR (DImode, copy_rtx (temp),
10742 GEN_INT (ud2)));
10743 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10744 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10745 GEN_INT (16)));
10746 if (ud1 != 0)
10747 emit_move_insn (dest,
10748 gen_rtx_IOR (DImode, copy_rtx (temp),
10749 GEN_INT (ud1)));
10751 else
10753 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10755 emit_move_insn (copy_rtx (temp),
10756 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10757 if (ud3 != 0)
10758 emit_move_insn (copy_rtx (temp),
10759 gen_rtx_IOR (DImode, copy_rtx (temp),
10760 GEN_INT (ud3)));
10762 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10763 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10764 GEN_INT (32)));
10765 if (ud2 != 0)
10766 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10767 gen_rtx_IOR (DImode, copy_rtx (temp),
10768 GEN_INT (ud2 << 16)));
10769 if (ud1 != 0)
10770 emit_move_insn (dest,
10771 gen_rtx_IOR (DImode, copy_rtx (temp),
10772 GEN_INT (ud1)));
10776 /* Helper for the following. Get rid of [r+r] memory refs
10777 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10779 static void
10780 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10782 if (reload_in_progress)
10783 return;
10785 if (GET_CODE (operands[0]) == MEM
10786 && GET_CODE (XEXP (operands[0], 0)) != REG
10787 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10788 GET_MODE (operands[0]), false))
10789 operands[0]
10790 = replace_equiv_address (operands[0],
10791 copy_addr_to_reg (XEXP (operands[0], 0)));
10793 if (GET_CODE (operands[1]) == MEM
10794 && GET_CODE (XEXP (operands[1], 0)) != REG
10795 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10796 GET_MODE (operands[1]), false))
10797 operands[1]
10798 = replace_equiv_address (operands[1],
10799 copy_addr_to_reg (XEXP (operands[1], 0)));
10802 /* Generate a vector of constants to permute MODE for a little-endian
10803 storage operation by swapping the two halves of a vector. */
10804 static rtvec
10805 rs6000_const_vec (machine_mode mode)
10807 int i, subparts;
10808 rtvec v;
10810 switch (mode)
10812 case E_V1TImode:
10813 subparts = 1;
10814 break;
10815 case E_V2DFmode:
10816 case E_V2DImode:
10817 subparts = 2;
10818 break;
10819 case E_V4SFmode:
10820 case E_V4SImode:
10821 subparts = 4;
10822 break;
10823 case E_V8HImode:
10824 subparts = 8;
10825 break;
10826 case E_V16QImode:
10827 subparts = 16;
10828 break;
10829 default:
10830 gcc_unreachable();
10833 v = rtvec_alloc (subparts);
10835 for (i = 0; i < subparts / 2; ++i)
10836 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10837 for (i = subparts / 2; i < subparts; ++i)
10838 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10840 return v;
10843 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10844 for a VSX load or store operation. */
10846 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10848 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10849 128-bit integers if they are allowed in VSX registers. */
10850 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10851 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10852 else
10854 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10855 return gen_rtx_VEC_SELECT (mode, source, par);
10859 /* Emit a little-endian load from vector memory location SOURCE to VSX
10860 register DEST in mode MODE. The load is done with two permuting
10861 insn's that represent an lxvd2x and xxpermdi. */
10862 void
10863 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10865 rtx tmp, permute_mem, permute_reg;
10867 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10868 V1TImode). */
10869 if (mode == TImode || mode == V1TImode)
10871 mode = V2DImode;
10872 dest = gen_lowpart (V2DImode, dest);
10873 source = adjust_address (source, V2DImode, 0);
10876 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10877 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10878 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10879 emit_insn (gen_rtx_SET (tmp, permute_mem));
10880 emit_insn (gen_rtx_SET (dest, permute_reg));
10883 /* Emit a little-endian store to vector memory location DEST from VSX
10884 register SOURCE in mode MODE. The store is done with two permuting
10885 insn's that represent an xxpermdi and an stxvd2x. */
10886 void
10887 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10889 rtx tmp, permute_src, permute_tmp;
10891 /* This should never be called during or after reload, because it does
10892 not re-permute the source register. It is intended only for use
10893 during expand. */
10894 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10896 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10897 V1TImode). */
10898 if (mode == TImode || mode == V1TImode)
10900 mode = V2DImode;
10901 dest = adjust_address (dest, V2DImode, 0);
10902 source = gen_lowpart (V2DImode, source);
10905 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10906 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10907 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10908 emit_insn (gen_rtx_SET (tmp, permute_src));
10909 emit_insn (gen_rtx_SET (dest, permute_tmp));
10912 /* Emit a sequence representing a little-endian VSX load or store,
10913 moving data from SOURCE to DEST in mode MODE. This is done
10914 separately from rs6000_emit_move to ensure it is called only
10915 during expand. LE VSX loads and stores introduced later are
10916 handled with a split. The expand-time RTL generation allows
10917 us to optimize away redundant pairs of register-permutes. */
10918 void
10919 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10921 gcc_assert (!BYTES_BIG_ENDIAN
10922 && VECTOR_MEM_VSX_P (mode)
10923 && !TARGET_P9_VECTOR
10924 && !gpr_or_gpr_p (dest, source)
10925 && (MEM_P (source) ^ MEM_P (dest)));
10927 if (MEM_P (source))
10929 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10930 rs6000_emit_le_vsx_load (dest, source, mode);
10932 else
10934 if (!REG_P (source))
10935 source = force_reg (mode, source);
10936 rs6000_emit_le_vsx_store (dest, source, mode);
10940 /* Return whether a SFmode or SImode move can be done without converting one
10941 mode to another. This arrises when we have:
10943 (SUBREG:SF (REG:SI ...))
10944 (SUBREG:SI (REG:SF ...))
10946 and one of the values is in a floating point/vector register, where SFmode
10947 scalars are stored in DFmode format. */
10949 bool
10950 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10952 if (TARGET_ALLOW_SF_SUBREG)
10953 return true;
10955 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10956 return true;
10958 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10959 return true;
10961 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10962 if (SUBREG_P (dest))
10964 rtx dest_subreg = SUBREG_REG (dest);
10965 rtx src_subreg = SUBREG_REG (src);
10966 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10969 return false;
10973 /* Helper function to change moves with:
10975 (SUBREG:SF (REG:SI)) and
10976 (SUBREG:SI (REG:SF))
10978 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10979 values are stored as DFmode values in the VSX registers. We need to convert
10980 the bits before we can use a direct move or operate on the bits in the
10981 vector register as an integer type.
10983 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10985 static bool
10986 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10988 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10989 && !lra_in_progress
10990 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10991 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10993 rtx inner_source = SUBREG_REG (source);
10994 machine_mode inner_mode = GET_MODE (inner_source);
10996 if (mode == SImode && inner_mode == SFmode)
10998 emit_insn (gen_movsi_from_sf (dest, inner_source));
10999 return true;
11002 if (mode == SFmode && inner_mode == SImode)
11004 emit_insn (gen_movsf_from_si (dest, inner_source));
11005 return true;
11009 return false;
11012 /* Emit a move from SOURCE to DEST in mode MODE. */
11013 void
11014 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
11016 rtx operands[2];
11017 operands[0] = dest;
11018 operands[1] = source;
11020 if (TARGET_DEBUG_ADDR)
11022 fprintf (stderr,
11023 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
11024 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
11025 GET_MODE_NAME (mode),
11026 reload_in_progress,
11027 reload_completed,
11028 can_create_pseudo_p ());
11029 debug_rtx (dest);
11030 fprintf (stderr, "source:\n");
11031 debug_rtx (source);
11034 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
11035 if (CONST_WIDE_INT_P (operands[1])
11036 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
11038 /* This should be fixed with the introduction of CONST_WIDE_INT. */
11039 gcc_unreachable ();
11042 /* See if we need to special case SImode/SFmode SUBREG moves. */
11043 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
11044 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
11045 return;
11047 /* Check if GCC is setting up a block move that will end up using FP
11048 registers as temporaries. We must make sure this is acceptable. */
11049 if (GET_CODE (operands[0]) == MEM
11050 && GET_CODE (operands[1]) == MEM
11051 && mode == DImode
11052 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
11053 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
11054 && ! (rs6000_slow_unaligned_access (SImode,
11055 (MEM_ALIGN (operands[0]) > 32
11056 ? 32 : MEM_ALIGN (operands[0])))
11057 || rs6000_slow_unaligned_access (SImode,
11058 (MEM_ALIGN (operands[1]) > 32
11059 ? 32 : MEM_ALIGN (operands[1]))))
11060 && ! MEM_VOLATILE_P (operands [0])
11061 && ! MEM_VOLATILE_P (operands [1]))
11063 emit_move_insn (adjust_address (operands[0], SImode, 0),
11064 adjust_address (operands[1], SImode, 0));
11065 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
11066 adjust_address (copy_rtx (operands[1]), SImode, 4));
11067 return;
11070 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
11071 && !gpc_reg_operand (operands[1], mode))
11072 operands[1] = force_reg (mode, operands[1]);
11074 /* Recognize the case where operand[1] is a reference to thread-local
11075 data and load its address to a register. */
11076 if (tls_referenced_p (operands[1]))
11078 enum tls_model model;
11079 rtx tmp = operands[1];
11080 rtx addend = NULL;
11082 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11084 addend = XEXP (XEXP (tmp, 0), 1);
11085 tmp = XEXP (XEXP (tmp, 0), 0);
11088 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
11089 model = SYMBOL_REF_TLS_MODEL (tmp);
11090 gcc_assert (model != 0);
11092 tmp = rs6000_legitimize_tls_address (tmp, model);
11093 if (addend)
11095 tmp = gen_rtx_PLUS (mode, tmp, addend);
11096 tmp = force_operand (tmp, operands[0]);
11098 operands[1] = tmp;
11101 /* Handle the case where reload calls us with an invalid address. */
11102 if (reload_in_progress && mode == Pmode
11103 && (! general_operand (operands[1], mode)
11104 || ! nonimmediate_operand (operands[0], mode)))
11105 goto emit_set;
11107 /* 128-bit constant floating-point values on Darwin should really be loaded
11108 as two parts. However, this premature splitting is a problem when DFmode
11109 values can go into Altivec registers. */
11110 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
11111 && GET_CODE (operands[1]) == CONST_DOUBLE)
11113 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11114 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11115 DFmode);
11116 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11117 GET_MODE_SIZE (DFmode)),
11118 simplify_gen_subreg (DFmode, operands[1], mode,
11119 GET_MODE_SIZE (DFmode)),
11120 DFmode);
11121 return;
11124 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
11125 cfun->machine->sdmode_stack_slot =
11126 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
11129 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11130 p1:SD) if p1 is not of floating point class and p0 is spilled as
11131 we can have no analogous movsd_store for this. */
11132 if (lra_in_progress && mode == DDmode
11133 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11134 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11135 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
11136 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11138 enum reg_class cl;
11139 int regno = REGNO (SUBREG_REG (operands[1]));
11141 if (regno >= FIRST_PSEUDO_REGISTER)
11143 cl = reg_preferred_class (regno);
11144 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11146 if (regno >= 0 && ! FP_REGNO_P (regno))
11148 mode = SDmode;
11149 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11150 operands[1] = SUBREG_REG (operands[1]);
11153 if (lra_in_progress
11154 && mode == SDmode
11155 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11156 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11157 && (REG_P (operands[1])
11158 || (GET_CODE (operands[1]) == SUBREG
11159 && REG_P (SUBREG_REG (operands[1])))))
11161 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
11162 ? SUBREG_REG (operands[1]) : operands[1]);
11163 enum reg_class cl;
11165 if (regno >= FIRST_PSEUDO_REGISTER)
11167 cl = reg_preferred_class (regno);
11168 gcc_assert (cl != NO_REGS);
11169 regno = ira_class_hard_regs[cl][0];
11171 if (FP_REGNO_P (regno))
11173 if (GET_MODE (operands[0]) != DDmode)
11174 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11175 emit_insn (gen_movsd_store (operands[0], operands[1]));
11177 else if (INT_REGNO_P (regno))
11178 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11179 else
11180 gcc_unreachable();
11181 return;
11183 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11184 p:DD)) if p0 is not of floating point class and p1 is spilled as
11185 we can have no analogous movsd_load for this. */
11186 if (lra_in_progress && mode == DDmode
11187 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
11188 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11189 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11190 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11192 enum reg_class cl;
11193 int regno = REGNO (SUBREG_REG (operands[0]));
11195 if (regno >= FIRST_PSEUDO_REGISTER)
11197 cl = reg_preferred_class (regno);
11198 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11200 if (regno >= 0 && ! FP_REGNO_P (regno))
11202 mode = SDmode;
11203 operands[0] = SUBREG_REG (operands[0]);
11204 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11207 if (lra_in_progress
11208 && mode == SDmode
11209 && (REG_P (operands[0])
11210 || (GET_CODE (operands[0]) == SUBREG
11211 && REG_P (SUBREG_REG (operands[0]))))
11212 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11213 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11215 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
11216 ? SUBREG_REG (operands[0]) : operands[0]);
11217 enum reg_class cl;
11219 if (regno >= FIRST_PSEUDO_REGISTER)
11221 cl = reg_preferred_class (regno);
11222 gcc_assert (cl != NO_REGS);
11223 regno = ira_class_hard_regs[cl][0];
11225 if (FP_REGNO_P (regno))
11227 if (GET_MODE (operands[1]) != DDmode)
11228 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11229 emit_insn (gen_movsd_load (operands[0], operands[1]));
11231 else if (INT_REGNO_P (regno))
11232 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11233 else
11234 gcc_unreachable();
11235 return;
11238 if (reload_in_progress
11239 && mode == SDmode
11240 && cfun->machine->sdmode_stack_slot != NULL_RTX
11241 && MEM_P (operands[0])
11242 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
11243 && REG_P (operands[1]))
11245 if (FP_REGNO_P (REGNO (operands[1])))
11247 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
11248 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11249 emit_insn (gen_movsd_store (mem, operands[1]));
11251 else if (INT_REGNO_P (REGNO (operands[1])))
11253 rtx mem = operands[0];
11254 if (BYTES_BIG_ENDIAN)
11255 mem = adjust_address_nv (mem, mode, 4);
11256 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11257 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11259 else
11260 gcc_unreachable();
11261 return;
11263 if (reload_in_progress
11264 && mode == SDmode
11265 && REG_P (operands[0])
11266 && MEM_P (operands[1])
11267 && cfun->machine->sdmode_stack_slot != NULL_RTX
11268 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11270 if (FP_REGNO_P (REGNO (operands[0])))
11272 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11273 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11274 emit_insn (gen_movsd_load (operands[0], mem));
11276 else if (INT_REGNO_P (REGNO (operands[0])))
11278 rtx mem = operands[1];
11279 if (BYTES_BIG_ENDIAN)
11280 mem = adjust_address_nv (mem, mode, 4);
11281 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11282 emit_insn (gen_movsd_hardfloat (operands[0], mem));
11284 else
11285 gcc_unreachable();
11286 return;
11289 /* FIXME: In the long term, this switch statement should go away
11290 and be replaced by a sequence of tests based on things like
11291 mode == Pmode. */
11292 switch (mode)
11294 case E_HImode:
11295 case E_QImode:
11296 if (CONSTANT_P (operands[1])
11297 && GET_CODE (operands[1]) != CONST_INT)
11298 operands[1] = force_const_mem (mode, operands[1]);
11299 break;
11301 case E_TFmode:
11302 case E_TDmode:
11303 case E_IFmode:
11304 case E_KFmode:
11305 if (FLOAT128_2REG_P (mode))
11306 rs6000_eliminate_indexed_memrefs (operands);
11307 /* fall through */
11309 case E_DFmode:
11310 case E_DDmode:
11311 case E_SFmode:
11312 case E_SDmode:
11313 if (CONSTANT_P (operands[1])
11314 && ! easy_fp_constant (operands[1], mode))
11315 operands[1] = force_const_mem (mode, operands[1]);
11316 break;
11318 case E_V16QImode:
11319 case E_V8HImode:
11320 case E_V4SFmode:
11321 case E_V4SImode:
11322 case E_V4HImode:
11323 case E_V2SFmode:
11324 case E_V2SImode:
11325 case E_V1DImode:
11326 case E_V2DFmode:
11327 case E_V2DImode:
11328 case E_V1TImode:
11329 if (CONSTANT_P (operands[1])
11330 && !easy_vector_constant (operands[1], mode))
11331 operands[1] = force_const_mem (mode, operands[1]);
11332 break;
11334 case E_SImode:
11335 case E_DImode:
11336 /* Use default pattern for address of ELF small data */
11337 if (TARGET_ELF
11338 && mode == Pmode
11339 && DEFAULT_ABI == ABI_V4
11340 && (GET_CODE (operands[1]) == SYMBOL_REF
11341 || GET_CODE (operands[1]) == CONST)
11342 && small_data_operand (operands[1], mode))
11344 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11345 return;
11348 if (DEFAULT_ABI == ABI_V4
11349 && mode == Pmode && mode == SImode
11350 && flag_pic == 1 && got_operand (operands[1], mode))
11352 emit_insn (gen_movsi_got (operands[0], operands[1]));
11353 return;
11356 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11357 && TARGET_NO_TOC
11358 && ! flag_pic
11359 && mode == Pmode
11360 && CONSTANT_P (operands[1])
11361 && GET_CODE (operands[1]) != HIGH
11362 && GET_CODE (operands[1]) != CONST_INT)
11364 rtx target = (!can_create_pseudo_p ()
11365 ? operands[0]
11366 : gen_reg_rtx (mode));
11368 /* If this is a function address on -mcall-aixdesc,
11369 convert it to the address of the descriptor. */
11370 if (DEFAULT_ABI == ABI_AIX
11371 && GET_CODE (operands[1]) == SYMBOL_REF
11372 && XSTR (operands[1], 0)[0] == '.')
11374 const char *name = XSTR (operands[1], 0);
11375 rtx new_ref;
11376 while (*name == '.')
11377 name++;
11378 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11379 CONSTANT_POOL_ADDRESS_P (new_ref)
11380 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11381 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11382 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11383 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11384 operands[1] = new_ref;
11387 if (DEFAULT_ABI == ABI_DARWIN)
11389 #if TARGET_MACHO
11390 if (MACHO_DYNAMIC_NO_PIC_P)
11392 /* Take care of any required data indirection. */
11393 operands[1] = rs6000_machopic_legitimize_pic_address (
11394 operands[1], mode, operands[0]);
11395 if (operands[0] != operands[1])
11396 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11397 return;
11399 #endif
11400 emit_insn (gen_macho_high (target, operands[1]));
11401 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11402 return;
11405 emit_insn (gen_elf_high (target, operands[1]));
11406 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11407 return;
11410 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11411 and we have put it in the TOC, we just need to make a TOC-relative
11412 reference to it. */
11413 if (TARGET_TOC
11414 && GET_CODE (operands[1]) == SYMBOL_REF
11415 && use_toc_relative_ref (operands[1], mode))
11416 operands[1] = create_TOC_reference (operands[1], operands[0]);
11417 else if (mode == Pmode
11418 && CONSTANT_P (operands[1])
11419 && GET_CODE (operands[1]) != HIGH
11420 && ((GET_CODE (operands[1]) != CONST_INT
11421 && ! easy_fp_constant (operands[1], mode))
11422 || (GET_CODE (operands[1]) == CONST_INT
11423 && (num_insns_constant (operands[1], mode)
11424 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11425 || (GET_CODE (operands[0]) == REG
11426 && FP_REGNO_P (REGNO (operands[0]))))
11427 && !toc_relative_expr_p (operands[1], false)
11428 && (TARGET_CMODEL == CMODEL_SMALL
11429 || can_create_pseudo_p ()
11430 || (REG_P (operands[0])
11431 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11434 #if TARGET_MACHO
11435 /* Darwin uses a special PIC legitimizer. */
11436 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11438 operands[1] =
11439 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11440 operands[0]);
11441 if (operands[0] != operands[1])
11442 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11443 return;
11445 #endif
11447 /* If we are to limit the number of things we put in the TOC and
11448 this is a symbol plus a constant we can add in one insn,
11449 just put the symbol in the TOC and add the constant. Don't do
11450 this if reload is in progress. */
11451 if (GET_CODE (operands[1]) == CONST
11452 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11453 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11454 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11455 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11456 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11457 && ! side_effects_p (operands[0]))
11459 rtx sym =
11460 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11461 rtx other = XEXP (XEXP (operands[1], 0), 1);
11463 sym = force_reg (mode, sym);
11464 emit_insn (gen_add3_insn (operands[0], sym, other));
11465 return;
11468 operands[1] = force_const_mem (mode, operands[1]);
11470 if (TARGET_TOC
11471 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11472 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11474 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11475 operands[0]);
11476 operands[1] = gen_const_mem (mode, tocref);
11477 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11480 break;
11482 case E_TImode:
11483 if (!VECTOR_MEM_VSX_P (TImode))
11484 rs6000_eliminate_indexed_memrefs (operands);
11485 break;
11487 case E_PTImode:
11488 rs6000_eliminate_indexed_memrefs (operands);
11489 break;
11491 default:
11492 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11495 /* Above, we may have called force_const_mem which may have returned
11496 an invalid address. If we can, fix this up; otherwise, reload will
11497 have to deal with it. */
11498 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11499 operands[1] = validize_mem (operands[1]);
11501 emit_set:
11502 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11505 /* Return true if a structure, union or array containing FIELD should be
11506 accessed using `BLKMODE'.
11508 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11509 entire thing in a DI and use subregs to access the internals.
11510 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11511 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11512 best thing to do is set structs to BLKmode and avoid Severe Tire
11513 Damage.
11515 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11516 fit into 1, whereas DI still needs two. */
11518 static bool
11519 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11521 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11522 || (TARGET_E500_DOUBLE && mode == DFmode));
11525 /* Nonzero if we can use a floating-point register to pass this arg. */
11526 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11527 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11528 && (CUM)->fregno <= FP_ARG_MAX_REG \
11529 && TARGET_HARD_FLOAT && TARGET_FPRS)
11531 /* Nonzero if we can use an AltiVec register to pass this arg. */
11532 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11533 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11534 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11535 && TARGET_ALTIVEC_ABI \
11536 && (NAMED))
11538 /* Walk down the type tree of TYPE counting consecutive base elements.
11539 If *MODEP is VOIDmode, then set it to the first valid floating point
11540 or vector type. If a non-floating point or vector type is found, or
11541 if a floating point or vector type that doesn't match a non-VOIDmode
11542 *MODEP is found, then return -1, otherwise return the count in the
11543 sub-tree. */
11545 static int
11546 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11548 machine_mode mode;
11549 HOST_WIDE_INT size;
11551 switch (TREE_CODE (type))
11553 case REAL_TYPE:
11554 mode = TYPE_MODE (type);
11555 if (!SCALAR_FLOAT_MODE_P (mode))
11556 return -1;
11558 if (*modep == VOIDmode)
11559 *modep = mode;
11561 if (*modep == mode)
11562 return 1;
11564 break;
11566 case COMPLEX_TYPE:
11567 mode = TYPE_MODE (TREE_TYPE (type));
11568 if (!SCALAR_FLOAT_MODE_P (mode))
11569 return -1;
11571 if (*modep == VOIDmode)
11572 *modep = mode;
11574 if (*modep == mode)
11575 return 2;
11577 break;
11579 case VECTOR_TYPE:
11580 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11581 return -1;
11583 /* Use V4SImode as representative of all 128-bit vector types. */
11584 size = int_size_in_bytes (type);
11585 switch (size)
11587 case 16:
11588 mode = V4SImode;
11589 break;
11590 default:
11591 return -1;
11594 if (*modep == VOIDmode)
11595 *modep = mode;
11597 /* Vector modes are considered to be opaque: two vectors are
11598 equivalent for the purposes of being homogeneous aggregates
11599 if they are the same size. */
11600 if (*modep == mode)
11601 return 1;
11603 break;
11605 case ARRAY_TYPE:
11607 int count;
11608 tree index = TYPE_DOMAIN (type);
11610 /* Can't handle incomplete types nor sizes that are not
11611 fixed. */
11612 if (!COMPLETE_TYPE_P (type)
11613 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11614 return -1;
11616 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11617 if (count == -1
11618 || !index
11619 || !TYPE_MAX_VALUE (index)
11620 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11621 || !TYPE_MIN_VALUE (index)
11622 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11623 || count < 0)
11624 return -1;
11626 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11627 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11629 /* There must be no padding. */
11630 if (wi::to_wide (TYPE_SIZE (type))
11631 != count * GET_MODE_BITSIZE (*modep))
11632 return -1;
11634 return count;
11637 case RECORD_TYPE:
11639 int count = 0;
11640 int sub_count;
11641 tree field;
11643 /* Can't handle incomplete types nor sizes that are not
11644 fixed. */
11645 if (!COMPLETE_TYPE_P (type)
11646 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11647 return -1;
11649 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11651 if (TREE_CODE (field) != FIELD_DECL)
11652 continue;
11654 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11655 if (sub_count < 0)
11656 return -1;
11657 count += sub_count;
11660 /* There must be no padding. */
11661 if (wi::to_wide (TYPE_SIZE (type))
11662 != count * GET_MODE_BITSIZE (*modep))
11663 return -1;
11665 return count;
11668 case UNION_TYPE:
11669 case QUAL_UNION_TYPE:
11671 /* These aren't very interesting except in a degenerate case. */
11672 int count = 0;
11673 int sub_count;
11674 tree field;
11676 /* Can't handle incomplete types nor sizes that are not
11677 fixed. */
11678 if (!COMPLETE_TYPE_P (type)
11679 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11680 return -1;
11682 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11684 if (TREE_CODE (field) != FIELD_DECL)
11685 continue;
11687 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11688 if (sub_count < 0)
11689 return -1;
11690 count = count > sub_count ? count : sub_count;
11693 /* There must be no padding. */
11694 if (wi::to_wide (TYPE_SIZE (type))
11695 != count * GET_MODE_BITSIZE (*modep))
11696 return -1;
11698 return count;
11701 default:
11702 break;
11705 return -1;
11708 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11709 float or vector aggregate that shall be passed in FP/vector registers
11710 according to the ELFv2 ABI, return the homogeneous element mode in
11711 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11713 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11715 static bool
11716 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11717 machine_mode *elt_mode,
11718 int *n_elts)
11720 /* Note that we do not accept complex types at the top level as
11721 homogeneous aggregates; these types are handled via the
11722 targetm.calls.split_complex_arg mechanism. Complex types
11723 can be elements of homogeneous aggregates, however. */
11724 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11726 machine_mode field_mode = VOIDmode;
11727 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11729 if (field_count > 0)
11731 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11732 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11734 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11735 up to AGGR_ARG_NUM_REG registers. */
11736 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11738 if (elt_mode)
11739 *elt_mode = field_mode;
11740 if (n_elts)
11741 *n_elts = field_count;
11742 return true;
11747 if (elt_mode)
11748 *elt_mode = mode;
11749 if (n_elts)
11750 *n_elts = 1;
11751 return false;
11754 /* Return a nonzero value to say to return the function value in
11755 memory, just as large structures are always returned. TYPE will be
11756 the data type of the value, and FNTYPE will be the type of the
11757 function doing the returning, or @code{NULL} for libcalls.
11759 The AIX ABI for the RS/6000 specifies that all structures are
11760 returned in memory. The Darwin ABI does the same.
11762 For the Darwin 64 Bit ABI, a function result can be returned in
11763 registers or in memory, depending on the size of the return data
11764 type. If it is returned in registers, the value occupies the same
11765 registers as it would if it were the first and only function
11766 argument. Otherwise, the function places its result in memory at
11767 the location pointed to by GPR3.
11769 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11770 but a draft put them in memory, and GCC used to implement the draft
11771 instead of the final standard. Therefore, aix_struct_return
11772 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11773 compatibility can change DRAFT_V4_STRUCT_RET to override the
11774 default, and -m switches get the final word. See
11775 rs6000_option_override_internal for more details.
11777 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11778 long double support is enabled. These values are returned in memory.
11780 int_size_in_bytes returns -1 for variable size objects, which go in
11781 memory always. The cast to unsigned makes -1 > 8. */
11783 static bool
11784 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11786 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11787 if (TARGET_MACHO
11788 && rs6000_darwin64_abi
11789 && TREE_CODE (type) == RECORD_TYPE
11790 && int_size_in_bytes (type) > 0)
11792 CUMULATIVE_ARGS valcum;
11793 rtx valret;
11795 valcum.words = 0;
11796 valcum.fregno = FP_ARG_MIN_REG;
11797 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11798 /* Do a trial code generation as if this were going to be passed
11799 as an argument; if any part goes in memory, we return NULL. */
11800 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11801 if (valret)
11802 return false;
11803 /* Otherwise fall through to more conventional ABI rules. */
11806 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11807 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11808 NULL, NULL))
11809 return false;
11811 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11812 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11813 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11814 return false;
11816 if (AGGREGATE_TYPE_P (type)
11817 && (aix_struct_return
11818 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11819 return true;
11821 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11822 modes only exist for GCC vector types if -maltivec. */
11823 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11824 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11825 return false;
11827 /* Return synthetic vectors in memory. */
11828 if (TREE_CODE (type) == VECTOR_TYPE
11829 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11831 static bool warned_for_return_big_vectors = false;
11832 if (!warned_for_return_big_vectors)
11834 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11835 "non-standard ABI extension with no compatibility guarantee");
11836 warned_for_return_big_vectors = true;
11838 return true;
11841 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11842 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11843 return true;
11845 return false;
11848 /* Specify whether values returned in registers should be at the most
11849 significant end of a register. We want aggregates returned by
11850 value to match the way aggregates are passed to functions. */
11852 static bool
11853 rs6000_return_in_msb (const_tree valtype)
11855 return (DEFAULT_ABI == ABI_ELFv2
11856 && BYTES_BIG_ENDIAN
11857 && AGGREGATE_TYPE_P (valtype)
11858 && rs6000_function_arg_padding (TYPE_MODE (valtype),
11859 valtype) == PAD_UPWARD);
11862 #ifdef HAVE_AS_GNU_ATTRIBUTE
11863 /* Return TRUE if a call to function FNDECL may be one that
11864 potentially affects the function calling ABI of the object file. */
11866 static bool
11867 call_ABI_of_interest (tree fndecl)
11869 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11871 struct cgraph_node *c_node;
11873 /* Libcalls are always interesting. */
11874 if (fndecl == NULL_TREE)
11875 return true;
11877 /* Any call to an external function is interesting. */
11878 if (DECL_EXTERNAL (fndecl))
11879 return true;
11881 /* Interesting functions that we are emitting in this object file. */
11882 c_node = cgraph_node::get (fndecl);
11883 c_node = c_node->ultimate_alias_target ();
11884 return !c_node->only_called_directly_p ();
11886 return false;
11888 #endif
11890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11891 for a call to a function whose data type is FNTYPE.
11892 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11894 For incoming args we set the number of arguments in the prototype large
11895 so we never return a PARALLEL. */
11897 void
11898 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11899 rtx libname ATTRIBUTE_UNUSED, int incoming,
11900 int libcall, int n_named_args,
11901 tree fndecl ATTRIBUTE_UNUSED,
11902 machine_mode return_mode ATTRIBUTE_UNUSED)
11904 static CUMULATIVE_ARGS zero_cumulative;
11906 *cum = zero_cumulative;
11907 cum->words = 0;
11908 cum->fregno = FP_ARG_MIN_REG;
11909 cum->vregno = ALTIVEC_ARG_MIN_REG;
11910 cum->prototype = (fntype && prototype_p (fntype));
11911 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11912 ? CALL_LIBCALL : CALL_NORMAL);
11913 cum->sysv_gregno = GP_ARG_MIN_REG;
11914 cum->stdarg = stdarg_p (fntype);
11915 cum->libcall = libcall;
11917 cum->nargs_prototype = 0;
11918 if (incoming || cum->prototype)
11919 cum->nargs_prototype = n_named_args;
11921 /* Check for a longcall attribute. */
11922 if ((!fntype && rs6000_default_long_calls)
11923 || (fntype
11924 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11925 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11926 cum->call_cookie |= CALL_LONG;
11928 if (TARGET_DEBUG_ARG)
11930 fprintf (stderr, "\ninit_cumulative_args:");
11931 if (fntype)
11933 tree ret_type = TREE_TYPE (fntype);
11934 fprintf (stderr, " ret code = %s,",
11935 get_tree_code_name (TREE_CODE (ret_type)));
11938 if (cum->call_cookie & CALL_LONG)
11939 fprintf (stderr, " longcall,");
11941 fprintf (stderr, " proto = %d, nargs = %d\n",
11942 cum->prototype, cum->nargs_prototype);
11945 #ifdef HAVE_AS_GNU_ATTRIBUTE
11946 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11948 cum->escapes = call_ABI_of_interest (fndecl);
11949 if (cum->escapes)
11951 tree return_type;
11953 if (fntype)
11955 return_type = TREE_TYPE (fntype);
11956 return_mode = TYPE_MODE (return_type);
11958 else
11959 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11961 if (return_type != NULL)
11963 if (TREE_CODE (return_type) == RECORD_TYPE
11964 && TYPE_TRANSPARENT_AGGR (return_type))
11966 return_type = TREE_TYPE (first_field (return_type));
11967 return_mode = TYPE_MODE (return_type);
11969 if (AGGREGATE_TYPE_P (return_type)
11970 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11971 <= 8))
11972 rs6000_returns_struct = true;
11974 if (SCALAR_FLOAT_MODE_P (return_mode))
11976 rs6000_passes_float = true;
11977 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11978 && (FLOAT128_IBM_P (return_mode)
11979 || FLOAT128_IEEE_P (return_mode)
11980 || (return_type != NULL
11981 && (TYPE_MAIN_VARIANT (return_type)
11982 == long_double_type_node))))
11983 rs6000_passes_long_double = true;
11985 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11986 || SPE_VECTOR_MODE (return_mode))
11987 rs6000_passes_vector = true;
11990 #endif
11992 if (fntype
11993 && !TARGET_ALTIVEC
11994 && TARGET_ALTIVEC_ABI
11995 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11997 error ("cannot return value in vector register because"
11998 " altivec instructions are disabled, use -maltivec"
11999 " to enable them");
12003 /* The mode the ABI uses for a word. This is not the same as word_mode
12004 for -m32 -mpowerpc64. This is used to implement various target hooks. */
12006 static scalar_int_mode
12007 rs6000_abi_word_mode (void)
12009 return TARGET_32BIT ? SImode : DImode;
12012 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
12013 static char *
12014 rs6000_offload_options (void)
12016 if (TARGET_64BIT)
12017 return xstrdup ("-foffload-abi=lp64");
12018 else
12019 return xstrdup ("-foffload-abi=ilp32");
12022 /* On rs6000, function arguments are promoted, as are function return
12023 values. */
12025 static machine_mode
12026 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
12027 machine_mode mode,
12028 int *punsignedp ATTRIBUTE_UNUSED,
12029 const_tree, int)
12031 PROMOTE_MODE (mode, *punsignedp, type);
12033 return mode;
12036 /* Return true if TYPE must be passed on the stack and not in registers. */
12038 static bool
12039 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
12041 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
12042 return must_pass_in_stack_var_size (mode, type);
12043 else
12044 return must_pass_in_stack_var_size_or_pad (mode, type);
12047 static inline bool
12048 is_complex_IBM_long_double (machine_mode mode)
12050 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
12053 /* Whether ABI_V4 passes MODE args to a function in floating point
12054 registers. */
12056 static bool
12057 abi_v4_pass_in_fpr (machine_mode mode)
12059 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
12060 return false;
12061 if (TARGET_SINGLE_FLOAT && mode == SFmode)
12062 return true;
12063 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
12064 return true;
12065 /* ABI_V4 passes complex IBM long double in 8 gprs.
12066 Stupid, but we can't change the ABI now. */
12067 if (is_complex_IBM_long_double (mode))
12068 return false;
12069 if (FLOAT128_2REG_P (mode))
12070 return true;
12071 if (DECIMAL_FLOAT_MODE_P (mode))
12072 return true;
12073 return false;
12076 /* Implement TARGET_FUNCTION_ARG_PADDING
12078 For the AIX ABI structs are always stored left shifted in their
12079 argument slot. */
12081 static pad_direction
12082 rs6000_function_arg_padding (machine_mode mode, const_tree type)
12084 #ifndef AGGREGATE_PADDING_FIXED
12085 #define AGGREGATE_PADDING_FIXED 0
12086 #endif
12087 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
12088 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
12089 #endif
12091 if (!AGGREGATE_PADDING_FIXED)
12093 /* GCC used to pass structures of the same size as integer types as
12094 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
12095 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
12096 passed padded downward, except that -mstrict-align further
12097 muddied the water in that multi-component structures of 2 and 4
12098 bytes in size were passed padded upward.
12100 The following arranges for best compatibility with previous
12101 versions of gcc, but removes the -mstrict-align dependency. */
12102 if (BYTES_BIG_ENDIAN)
12104 HOST_WIDE_INT size = 0;
12106 if (mode == BLKmode)
12108 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
12109 size = int_size_in_bytes (type);
12111 else
12112 size = GET_MODE_SIZE (mode);
12114 if (size == 1 || size == 2 || size == 4)
12115 return PAD_DOWNWARD;
12117 return PAD_UPWARD;
12120 if (AGGREGATES_PAD_UPWARD_ALWAYS)
12122 if (type != 0 && AGGREGATE_TYPE_P (type))
12123 return PAD_UPWARD;
12126 /* Fall back to the default. */
12127 return default_function_arg_padding (mode, type);
12130 /* If defined, a C expression that gives the alignment boundary, in bits,
12131 of an argument with the specified mode and type. If it is not defined,
12132 PARM_BOUNDARY is used for all arguments.
12134 V.4 wants long longs and doubles to be double word aligned. Just
12135 testing the mode size is a boneheaded way to do this as it means
12136 that other types such as complex int are also double word aligned.
12137 However, we're stuck with this because changing the ABI might break
12138 existing library interfaces.
12140 Doubleword align SPE vectors.
12141 Quadword align Altivec/VSX vectors.
12142 Quadword align large synthetic vector types. */
12144 static unsigned int
12145 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
12147 machine_mode elt_mode;
12148 int n_elts;
12150 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12152 if (DEFAULT_ABI == ABI_V4
12153 && (GET_MODE_SIZE (mode) == 8
12154 || (TARGET_HARD_FLOAT
12155 && TARGET_FPRS
12156 && !is_complex_IBM_long_double (mode)
12157 && FLOAT128_2REG_P (mode))))
12158 return 64;
12159 else if (FLOAT128_VECTOR_P (mode))
12160 return 128;
12161 else if (SPE_VECTOR_MODE (mode)
12162 || (type && TREE_CODE (type) == VECTOR_TYPE
12163 && int_size_in_bytes (type) >= 8
12164 && int_size_in_bytes (type) < 16))
12165 return 64;
12166 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12167 || (type && TREE_CODE (type) == VECTOR_TYPE
12168 && int_size_in_bytes (type) >= 16))
12169 return 128;
12171 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12172 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12173 -mcompat-align-parm is used. */
12174 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
12175 || DEFAULT_ABI == ABI_ELFv2)
12176 && type && TYPE_ALIGN (type) > 64)
12178 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12179 or homogeneous float/vector aggregates here. We already handled
12180 vector aggregates above, but still need to check for float here. */
12181 bool aggregate_p = (AGGREGATE_TYPE_P (type)
12182 && !SCALAR_FLOAT_MODE_P (elt_mode));
12184 /* We used to check for BLKmode instead of the above aggregate type
12185 check. Warn when this results in any difference to the ABI. */
12186 if (aggregate_p != (mode == BLKmode))
12188 static bool warned;
12189 if (!warned && warn_psabi)
12191 warned = true;
12192 inform (input_location,
12193 "the ABI of passing aggregates with %d-byte alignment"
12194 " has changed in GCC 5",
12195 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
12199 if (aggregate_p)
12200 return 128;
12203 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12204 implement the "aggregate type" check as a BLKmode check here; this
12205 means certain aggregate types are in fact not aligned. */
12206 if (TARGET_MACHO && rs6000_darwin64_abi
12207 && mode == BLKmode
12208 && type && TYPE_ALIGN (type) > 64)
12209 return 128;
12211 return PARM_BOUNDARY;
12214 /* The offset in words to the start of the parameter save area. */
12216 static unsigned int
12217 rs6000_parm_offset (void)
12219 return (DEFAULT_ABI == ABI_V4 ? 2
12220 : DEFAULT_ABI == ABI_ELFv2 ? 4
12221 : 6);
12224 /* For a function parm of MODE and TYPE, return the starting word in
12225 the parameter area. NWORDS of the parameter area are already used. */
12227 static unsigned int
12228 rs6000_parm_start (machine_mode mode, const_tree type,
12229 unsigned int nwords)
12231 unsigned int align;
12233 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
12234 return nwords + (-(rs6000_parm_offset () + nwords) & align);
12237 /* Compute the size (in words) of a function argument. */
12239 static unsigned long
12240 rs6000_arg_size (machine_mode mode, const_tree type)
12242 unsigned long size;
12244 if (mode != BLKmode)
12245 size = GET_MODE_SIZE (mode);
12246 else
12247 size = int_size_in_bytes (type);
12249 if (TARGET_32BIT)
12250 return (size + 3) >> 2;
12251 else
12252 return (size + 7) >> 3;
12255 /* Use this to flush pending int fields. */
12257 static void
12258 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12259 HOST_WIDE_INT bitpos, int final)
12261 unsigned int startbit, endbit;
12262 int intregs, intoffset;
12264 /* Handle the situations where a float is taking up the first half
12265 of the GPR, and the other half is empty (typically due to
12266 alignment restrictions). We can detect this by a 8-byte-aligned
12267 int field, or by seeing that this is the final flush for this
12268 argument. Count the word and continue on. */
12269 if (cum->floats_in_gpr == 1
12270 && (cum->intoffset % 64 == 0
12271 || (cum->intoffset == -1 && final)))
12273 cum->words++;
12274 cum->floats_in_gpr = 0;
12277 if (cum->intoffset == -1)
12278 return;
12280 intoffset = cum->intoffset;
12281 cum->intoffset = -1;
12282 cum->floats_in_gpr = 0;
12284 if (intoffset % BITS_PER_WORD != 0)
12286 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12287 if (!int_mode_for_size (bits, 0).exists ())
12289 /* We couldn't find an appropriate mode, which happens,
12290 e.g., in packed structs when there are 3 bytes to load.
12291 Back intoffset back to the beginning of the word in this
12292 case. */
12293 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12297 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12298 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12299 intregs = (endbit - startbit) / BITS_PER_WORD;
12300 cum->words += intregs;
12301 /* words should be unsigned. */
12302 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12304 int pad = (endbit/BITS_PER_WORD) - cum->words;
12305 cum->words += pad;
12309 /* The darwin64 ABI calls for us to recurse down through structs,
12310 looking for elements passed in registers. Unfortunately, we have
12311 to track int register count here also because of misalignments
12312 in powerpc alignment mode. */
12314 static void
12315 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12316 const_tree type,
12317 HOST_WIDE_INT startbitpos)
12319 tree f;
12321 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12322 if (TREE_CODE (f) == FIELD_DECL)
12324 HOST_WIDE_INT bitpos = startbitpos;
12325 tree ftype = TREE_TYPE (f);
12326 machine_mode mode;
12327 if (ftype == error_mark_node)
12328 continue;
12329 mode = TYPE_MODE (ftype);
12331 if (DECL_SIZE (f) != 0
12332 && tree_fits_uhwi_p (bit_position (f)))
12333 bitpos += int_bit_position (f);
12335 /* ??? FIXME: else assume zero offset. */
12337 if (TREE_CODE (ftype) == RECORD_TYPE)
12338 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12339 else if (USE_FP_FOR_ARG_P (cum, mode))
12341 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12342 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12343 cum->fregno += n_fpregs;
12344 /* Single-precision floats present a special problem for
12345 us, because they are smaller than an 8-byte GPR, and so
12346 the structure-packing rules combined with the standard
12347 varargs behavior mean that we want to pack float/float
12348 and float/int combinations into a single register's
12349 space. This is complicated by the arg advance flushing,
12350 which works on arbitrarily large groups of int-type
12351 fields. */
12352 if (mode == SFmode)
12354 if (cum->floats_in_gpr == 1)
12356 /* Two floats in a word; count the word and reset
12357 the float count. */
12358 cum->words++;
12359 cum->floats_in_gpr = 0;
12361 else if (bitpos % 64 == 0)
12363 /* A float at the beginning of an 8-byte word;
12364 count it and put off adjusting cum->words until
12365 we see if a arg advance flush is going to do it
12366 for us. */
12367 cum->floats_in_gpr++;
12369 else
12371 /* The float is at the end of a word, preceded
12372 by integer fields, so the arg advance flush
12373 just above has already set cum->words and
12374 everything is taken care of. */
12377 else
12378 cum->words += n_fpregs;
12380 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12382 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12383 cum->vregno++;
12384 cum->words += 2;
12386 else if (cum->intoffset == -1)
12387 cum->intoffset = bitpos;
12391 /* Check for an item that needs to be considered specially under the darwin 64
12392 bit ABI. These are record types where the mode is BLK or the structure is
12393 8 bytes in size. */
12394 static int
12395 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12397 return rs6000_darwin64_abi
12398 && ((mode == BLKmode
12399 && TREE_CODE (type) == RECORD_TYPE
12400 && int_size_in_bytes (type) > 0)
12401 || (type && TREE_CODE (type) == RECORD_TYPE
12402 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12405 /* Update the data in CUM to advance over an argument
12406 of mode MODE and data type TYPE.
12407 (TYPE is null for libcalls where that information may not be available.)
12409 Note that for args passed by reference, function_arg will be called
12410 with MODE and TYPE set to that of the pointer to the arg, not the arg
12411 itself. */
12413 static void
12414 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12415 const_tree type, bool named, int depth)
12417 machine_mode elt_mode;
12418 int n_elts;
12420 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12422 /* Only tick off an argument if we're not recursing. */
12423 if (depth == 0)
12424 cum->nargs_prototype--;
12426 #ifdef HAVE_AS_GNU_ATTRIBUTE
12427 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12428 && cum->escapes)
12430 if (SCALAR_FLOAT_MODE_P (mode))
12432 rs6000_passes_float = true;
12433 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12434 && (FLOAT128_IBM_P (mode)
12435 || FLOAT128_IEEE_P (mode)
12436 || (type != NULL
12437 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12438 rs6000_passes_long_double = true;
12440 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12441 || (SPE_VECTOR_MODE (mode)
12442 && !cum->stdarg
12443 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12444 rs6000_passes_vector = true;
12446 #endif
12448 if (TARGET_ALTIVEC_ABI
12449 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12450 || (type && TREE_CODE (type) == VECTOR_TYPE
12451 && int_size_in_bytes (type) == 16)))
12453 bool stack = false;
12455 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12457 cum->vregno += n_elts;
12459 if (!TARGET_ALTIVEC)
12460 error ("cannot pass argument in vector register because"
12461 " altivec instructions are disabled, use -maltivec"
12462 " to enable them");
12464 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12465 even if it is going to be passed in a vector register.
12466 Darwin does the same for variable-argument functions. */
12467 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12468 && TARGET_64BIT)
12469 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12470 stack = true;
12472 else
12473 stack = true;
12475 if (stack)
12477 int align;
12479 /* Vector parameters must be 16-byte aligned. In 32-bit
12480 mode this means we need to take into account the offset
12481 to the parameter save area. In 64-bit mode, they just
12482 have to start on an even word, since the parameter save
12483 area is 16-byte aligned. */
12484 if (TARGET_32BIT)
12485 align = -(rs6000_parm_offset () + cum->words) & 3;
12486 else
12487 align = cum->words & 1;
12488 cum->words += align + rs6000_arg_size (mode, type);
12490 if (TARGET_DEBUG_ARG)
12492 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12493 cum->words, align);
12494 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12495 cum->nargs_prototype, cum->prototype,
12496 GET_MODE_NAME (mode));
12500 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12501 && !cum->stdarg
12502 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12503 cum->sysv_gregno++;
12505 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12507 int size = int_size_in_bytes (type);
12508 /* Variable sized types have size == -1 and are
12509 treated as if consisting entirely of ints.
12510 Pad to 16 byte boundary if needed. */
12511 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12512 && (cum->words % 2) != 0)
12513 cum->words++;
12514 /* For varargs, we can just go up by the size of the struct. */
12515 if (!named)
12516 cum->words += (size + 7) / 8;
12517 else
12519 /* It is tempting to say int register count just goes up by
12520 sizeof(type)/8, but this is wrong in a case such as
12521 { int; double; int; } [powerpc alignment]. We have to
12522 grovel through the fields for these too. */
12523 cum->intoffset = 0;
12524 cum->floats_in_gpr = 0;
12525 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12526 rs6000_darwin64_record_arg_advance_flush (cum,
12527 size * BITS_PER_UNIT, 1);
12529 if (TARGET_DEBUG_ARG)
12531 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12532 cum->words, TYPE_ALIGN (type), size);
12533 fprintf (stderr,
12534 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12535 cum->nargs_prototype, cum->prototype,
12536 GET_MODE_NAME (mode));
12539 else if (DEFAULT_ABI == ABI_V4)
12541 if (abi_v4_pass_in_fpr (mode))
12543 /* _Decimal128 must use an even/odd register pair. This assumes
12544 that the register number is odd when fregno is odd. */
12545 if (mode == TDmode && (cum->fregno % 2) == 1)
12546 cum->fregno++;
12548 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12549 <= FP_ARG_V4_MAX_REG)
12550 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12551 else
12553 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12554 if (mode == DFmode || FLOAT128_IBM_P (mode)
12555 || mode == DDmode || mode == TDmode)
12556 cum->words += cum->words & 1;
12557 cum->words += rs6000_arg_size (mode, type);
12560 else
12562 int n_words = rs6000_arg_size (mode, type);
12563 int gregno = cum->sysv_gregno;
12565 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12566 (r7,r8) or (r9,r10). As does any other 2 word item such
12567 as complex int due to a historical mistake. */
12568 if (n_words == 2)
12569 gregno += (1 - gregno) & 1;
12571 /* Multi-reg args are not split between registers and stack. */
12572 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12574 /* Long long and SPE vectors are aligned on the stack.
12575 So are other 2 word items such as complex int due to
12576 a historical mistake. */
12577 if (n_words == 2)
12578 cum->words += cum->words & 1;
12579 cum->words += n_words;
12582 /* Note: continuing to accumulate gregno past when we've started
12583 spilling to the stack indicates the fact that we've started
12584 spilling to the stack to expand_builtin_saveregs. */
12585 cum->sysv_gregno = gregno + n_words;
12588 if (TARGET_DEBUG_ARG)
12590 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12591 cum->words, cum->fregno);
12592 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12593 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12594 fprintf (stderr, "mode = %4s, named = %d\n",
12595 GET_MODE_NAME (mode), named);
12598 else
12600 int n_words = rs6000_arg_size (mode, type);
12601 int start_words = cum->words;
12602 int align_words = rs6000_parm_start (mode, type, start_words);
12604 cum->words = align_words + n_words;
12606 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12608 /* _Decimal128 must be passed in an even/odd float register pair.
12609 This assumes that the register number is odd when fregno is
12610 odd. */
12611 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12612 cum->fregno++;
12613 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12616 if (TARGET_DEBUG_ARG)
12618 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12619 cum->words, cum->fregno);
12620 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12621 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12622 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12623 named, align_words - start_words, depth);
12628 static void
12629 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12630 const_tree type, bool named)
12632 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12636 static rtx
12637 spe_build_register_parallel (machine_mode mode, int gregno)
12639 rtx r1, r3, r5, r7;
12641 switch (mode)
12643 case E_DFmode:
12644 r1 = gen_rtx_REG (DImode, gregno);
12645 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12646 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12648 case E_DCmode:
12649 case E_TFmode:
12650 r1 = gen_rtx_REG (DImode, gregno);
12651 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12652 r3 = gen_rtx_REG (DImode, gregno + 2);
12653 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12654 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12656 case E_TCmode:
12657 r1 = gen_rtx_REG (DImode, gregno);
12658 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12659 r3 = gen_rtx_REG (DImode, gregno + 2);
12660 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12661 r5 = gen_rtx_REG (DImode, gregno + 4);
12662 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12663 r7 = gen_rtx_REG (DImode, gregno + 6);
12664 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12665 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12667 default:
12668 gcc_unreachable ();
12672 /* Determine where to put a SIMD argument on the SPE. */
12673 static rtx
12674 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12675 const_tree type)
12677 int gregno = cum->sysv_gregno;
12679 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12680 are passed and returned in a pair of GPRs for ABI compatibility. */
12681 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12682 || mode == DCmode || mode == TCmode))
12684 int n_words = rs6000_arg_size (mode, type);
12686 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12687 if (mode == DFmode)
12688 gregno += (1 - gregno) & 1;
12690 /* Multi-reg args are not split between registers and stack. */
12691 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12692 return NULL_RTX;
12694 return spe_build_register_parallel (mode, gregno);
12696 if (cum->stdarg)
12698 int n_words = rs6000_arg_size (mode, type);
12700 /* SPE vectors are put in odd registers. */
12701 if (n_words == 2 && (gregno & 1) == 0)
12702 gregno += 1;
12704 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12706 rtx r1, r2;
12707 machine_mode m = SImode;
12709 r1 = gen_rtx_REG (m, gregno);
12710 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12711 r2 = gen_rtx_REG (m, gregno + 1);
12712 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12713 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12715 else
12716 return NULL_RTX;
12718 else
12720 if (gregno <= GP_ARG_MAX_REG)
12721 return gen_rtx_REG (mode, gregno);
12722 else
12723 return NULL_RTX;
12727 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12728 structure between cum->intoffset and bitpos to integer registers. */
12730 static void
12731 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12732 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12734 machine_mode mode;
12735 unsigned int regno;
12736 unsigned int startbit, endbit;
12737 int this_regno, intregs, intoffset;
12738 rtx reg;
12740 if (cum->intoffset == -1)
12741 return;
12743 intoffset = cum->intoffset;
12744 cum->intoffset = -1;
12746 /* If this is the trailing part of a word, try to only load that
12747 much into the register. Otherwise load the whole register. Note
12748 that in the latter case we may pick up unwanted bits. It's not a
12749 problem at the moment but may wish to revisit. */
12751 if (intoffset % BITS_PER_WORD != 0)
12753 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12754 if (!int_mode_for_size (bits, 0).exists (&mode))
12756 /* We couldn't find an appropriate mode, which happens,
12757 e.g., in packed structs when there are 3 bytes to load.
12758 Back intoffset back to the beginning of the word in this
12759 case. */
12760 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12761 mode = word_mode;
12764 else
12765 mode = word_mode;
12767 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12768 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12769 intregs = (endbit - startbit) / BITS_PER_WORD;
12770 this_regno = cum->words + intoffset / BITS_PER_WORD;
12772 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12773 cum->use_stack = 1;
12775 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12776 if (intregs <= 0)
12777 return;
12779 intoffset /= BITS_PER_UNIT;
12782 regno = GP_ARG_MIN_REG + this_regno;
12783 reg = gen_rtx_REG (mode, regno);
12784 rvec[(*k)++] =
12785 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12787 this_regno += 1;
12788 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12789 mode = word_mode;
12790 intregs -= 1;
12792 while (intregs > 0);
12795 /* Recursive workhorse for the following. */
12797 static void
12798 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12799 HOST_WIDE_INT startbitpos, rtx rvec[],
12800 int *k)
12802 tree f;
12804 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12805 if (TREE_CODE (f) == FIELD_DECL)
12807 HOST_WIDE_INT bitpos = startbitpos;
12808 tree ftype = TREE_TYPE (f);
12809 machine_mode mode;
12810 if (ftype == error_mark_node)
12811 continue;
12812 mode = TYPE_MODE (ftype);
12814 if (DECL_SIZE (f) != 0
12815 && tree_fits_uhwi_p (bit_position (f)))
12816 bitpos += int_bit_position (f);
12818 /* ??? FIXME: else assume zero offset. */
12820 if (TREE_CODE (ftype) == RECORD_TYPE)
12821 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12822 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12824 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12825 #if 0
12826 switch (mode)
12828 case E_SCmode: mode = SFmode; break;
12829 case E_DCmode: mode = DFmode; break;
12830 case E_TCmode: mode = TFmode; break;
12831 default: break;
12833 #endif
12834 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12835 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12837 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12838 && (mode == TFmode || mode == TDmode));
12839 /* Long double or _Decimal128 split over regs and memory. */
12840 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12841 cum->use_stack=1;
12843 rvec[(*k)++]
12844 = gen_rtx_EXPR_LIST (VOIDmode,
12845 gen_rtx_REG (mode, cum->fregno++),
12846 GEN_INT (bitpos / BITS_PER_UNIT));
12847 if (FLOAT128_2REG_P (mode))
12848 cum->fregno++;
12850 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12852 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12853 rvec[(*k)++]
12854 = gen_rtx_EXPR_LIST (VOIDmode,
12855 gen_rtx_REG (mode, cum->vregno++),
12856 GEN_INT (bitpos / BITS_PER_UNIT));
12858 else if (cum->intoffset == -1)
12859 cum->intoffset = bitpos;
12863 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12864 the register(s) to be used for each field and subfield of a struct
12865 being passed by value, along with the offset of where the
12866 register's value may be found in the block. FP fields go in FP
12867 register, vector fields go in vector registers, and everything
12868 else goes in int registers, packed as in memory.
12870 This code is also used for function return values. RETVAL indicates
12871 whether this is the case.
12873 Much of this is taken from the SPARC V9 port, which has a similar
12874 calling convention. */
12876 static rtx
12877 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12878 bool named, bool retval)
12880 rtx rvec[FIRST_PSEUDO_REGISTER];
12881 int k = 1, kbase = 1;
12882 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12883 /* This is a copy; modifications are not visible to our caller. */
12884 CUMULATIVE_ARGS copy_cum = *orig_cum;
12885 CUMULATIVE_ARGS *cum = &copy_cum;
12887 /* Pad to 16 byte boundary if needed. */
12888 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12889 && (cum->words % 2) != 0)
12890 cum->words++;
12892 cum->intoffset = 0;
12893 cum->use_stack = 0;
12894 cum->named = named;
12896 /* Put entries into rvec[] for individual FP and vector fields, and
12897 for the chunks of memory that go in int regs. Note we start at
12898 element 1; 0 is reserved for an indication of using memory, and
12899 may or may not be filled in below. */
12900 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12901 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12903 /* If any part of the struct went on the stack put all of it there.
12904 This hack is because the generic code for
12905 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12906 parts of the struct are not at the beginning. */
12907 if (cum->use_stack)
12909 if (retval)
12910 return NULL_RTX; /* doesn't go in registers at all */
12911 kbase = 0;
12912 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12914 if (k > 1 || cum->use_stack)
12915 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12916 else
12917 return NULL_RTX;
12920 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12922 static rtx
12923 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12924 int align_words)
12926 int n_units;
12927 int i, k;
12928 rtx rvec[GP_ARG_NUM_REG + 1];
12930 if (align_words >= GP_ARG_NUM_REG)
12931 return NULL_RTX;
12933 n_units = rs6000_arg_size (mode, type);
12935 /* Optimize the simple case where the arg fits in one gpr, except in
12936 the case of BLKmode due to assign_parms assuming that registers are
12937 BITS_PER_WORD wide. */
12938 if (n_units == 0
12939 || (n_units == 1 && mode != BLKmode))
12940 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12942 k = 0;
12943 if (align_words + n_units > GP_ARG_NUM_REG)
12944 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12945 using a magic NULL_RTX component.
12946 This is not strictly correct. Only some of the arg belongs in
12947 memory, not all of it. However, the normal scheme using
12948 function_arg_partial_nregs can result in unusual subregs, eg.
12949 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12950 store the whole arg to memory is often more efficient than code
12951 to store pieces, and we know that space is available in the right
12952 place for the whole arg. */
12953 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12955 i = 0;
12958 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12959 rtx off = GEN_INT (i++ * 4);
12960 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12962 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12964 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12967 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12968 but must also be copied into the parameter save area starting at
12969 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12970 to the GPRs and/or memory. Return the number of elements used. */
12972 static int
12973 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12974 int align_words, rtx *rvec)
12976 int k = 0;
12978 if (align_words < GP_ARG_NUM_REG)
12980 int n_words = rs6000_arg_size (mode, type);
12982 if (align_words + n_words > GP_ARG_NUM_REG
12983 || mode == BLKmode
12984 || (TARGET_32BIT && TARGET_POWERPC64))
12986 /* If this is partially on the stack, then we only
12987 include the portion actually in registers here. */
12988 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12989 int i = 0;
12991 if (align_words + n_words > GP_ARG_NUM_REG)
12993 /* Not all of the arg fits in gprs. Say that it goes in memory
12994 too, using a magic NULL_RTX component. Also see comment in
12995 rs6000_mixed_function_arg for why the normal
12996 function_arg_partial_nregs scheme doesn't work in this case. */
12997 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
13002 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13003 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
13004 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13006 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13008 else
13010 /* The whole arg fits in gprs. */
13011 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13012 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
13015 else
13017 /* It's entirely in memory. */
13018 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
13021 return k;
13024 /* RVEC is a vector of K components of an argument of mode MODE.
13025 Construct the final function_arg return value from it. */
13027 static rtx
13028 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
13030 gcc_assert (k >= 1);
13032 /* Avoid returning a PARALLEL in the trivial cases. */
13033 if (k == 1)
13035 if (XEXP (rvec[0], 0) == NULL_RTX)
13036 return NULL_RTX;
13038 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
13039 return XEXP (rvec[0], 0);
13042 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
13045 /* Determine where to put an argument to a function.
13046 Value is zero to push the argument on the stack,
13047 or a hard register in which to store the argument.
13049 MODE is the argument's machine mode.
13050 TYPE is the data type of the argument (as a tree).
13051 This is null for libcalls where that information may
13052 not be available.
13053 CUM is a variable of type CUMULATIVE_ARGS which gives info about
13054 the preceding args and about the function being called. It is
13055 not modified in this routine.
13056 NAMED is nonzero if this argument is a named parameter
13057 (otherwise it is an extra parameter matching an ellipsis).
13059 On RS/6000 the first eight words of non-FP are normally in registers
13060 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
13061 Under V.4, the first 8 FP args are in registers.
13063 If this is floating-point and no prototype is specified, we use
13064 both an FP and integer register (or possibly FP reg and stack). Library
13065 functions (when CALL_LIBCALL is set) always have the proper types for args,
13066 so we can pass the FP value just in one register. emit_library_function
13067 doesn't support PARALLEL anyway.
13069 Note that for args passed by reference, function_arg will be called
13070 with MODE and TYPE set to that of the pointer to the arg, not the arg
13071 itself. */
13073 static rtx
13074 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
13075 const_tree type, bool named)
13077 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13078 enum rs6000_abi abi = DEFAULT_ABI;
13079 machine_mode elt_mode;
13080 int n_elts;
13082 /* Return a marker to indicate whether CR1 needs to set or clear the
13083 bit that V.4 uses to say fp args were passed in registers.
13084 Assume that we don't need the marker for software floating point,
13085 or compiler generated library calls. */
13086 if (mode == VOIDmode)
13088 if (abi == ABI_V4
13089 && (cum->call_cookie & CALL_LIBCALL) == 0
13090 && (cum->stdarg
13091 || (cum->nargs_prototype < 0
13092 && (cum->prototype || TARGET_NO_PROTOTYPE))))
13094 /* For the SPE, we need to crxor CR6 always. */
13095 if (TARGET_SPE_ABI)
13096 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
13097 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
13098 return GEN_INT (cum->call_cookie
13099 | ((cum->fregno == FP_ARG_MIN_REG)
13100 ? CALL_V4_SET_FP_ARGS
13101 : CALL_V4_CLEAR_FP_ARGS));
13104 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
13107 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13109 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13111 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
13112 if (rslt != NULL_RTX)
13113 return rslt;
13114 /* Else fall through to usual handling. */
13117 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13119 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13120 rtx r, off;
13121 int i, k = 0;
13123 /* Do we also need to pass this argument in the parameter save area?
13124 Library support functions for IEEE 128-bit are assumed to not need the
13125 value passed both in GPRs and in vector registers. */
13126 if (TARGET_64BIT && !cum->prototype
13127 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13129 int align_words = ROUND_UP (cum->words, 2);
13130 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13133 /* Describe where this argument goes in the vector registers. */
13134 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
13136 r = gen_rtx_REG (elt_mode, cum->vregno + i);
13137 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13138 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13141 return rs6000_finish_function_arg (mode, rvec, k);
13143 else if (TARGET_ALTIVEC_ABI
13144 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
13145 || (type && TREE_CODE (type) == VECTOR_TYPE
13146 && int_size_in_bytes (type) == 16)))
13148 if (named || abi == ABI_V4)
13149 return NULL_RTX;
13150 else
13152 /* Vector parameters to varargs functions under AIX or Darwin
13153 get passed in memory and possibly also in GPRs. */
13154 int align, align_words, n_words;
13155 machine_mode part_mode;
13157 /* Vector parameters must be 16-byte aligned. In 32-bit
13158 mode this means we need to take into account the offset
13159 to the parameter save area. In 64-bit mode, they just
13160 have to start on an even word, since the parameter save
13161 area is 16-byte aligned. */
13162 if (TARGET_32BIT)
13163 align = -(rs6000_parm_offset () + cum->words) & 3;
13164 else
13165 align = cum->words & 1;
13166 align_words = cum->words + align;
13168 /* Out of registers? Memory, then. */
13169 if (align_words >= GP_ARG_NUM_REG)
13170 return NULL_RTX;
13172 if (TARGET_32BIT && TARGET_POWERPC64)
13173 return rs6000_mixed_function_arg (mode, type, align_words);
13175 /* The vector value goes in GPRs. Only the part of the
13176 value in GPRs is reported here. */
13177 part_mode = mode;
13178 n_words = rs6000_arg_size (mode, type);
13179 if (align_words + n_words > GP_ARG_NUM_REG)
13180 /* Fortunately, there are only two possibilities, the value
13181 is either wholly in GPRs or half in GPRs and half not. */
13182 part_mode = DImode;
13184 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
13187 else if (TARGET_SPE_ABI && TARGET_SPE
13188 && (SPE_VECTOR_MODE (mode)
13189 || (TARGET_E500_DOUBLE && (mode == DFmode
13190 || mode == DCmode
13191 || mode == TFmode
13192 || mode == TCmode))))
13193 return rs6000_spe_function_arg (cum, mode, type);
13195 else if (abi == ABI_V4)
13197 if (abi_v4_pass_in_fpr (mode))
13199 /* _Decimal128 must use an even/odd register pair. This assumes
13200 that the register number is odd when fregno is odd. */
13201 if (mode == TDmode && (cum->fregno % 2) == 1)
13202 cum->fregno++;
13204 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
13205 <= FP_ARG_V4_MAX_REG)
13206 return gen_rtx_REG (mode, cum->fregno);
13207 else
13208 return NULL_RTX;
13210 else
13212 int n_words = rs6000_arg_size (mode, type);
13213 int gregno = cum->sysv_gregno;
13215 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13216 (r7,r8) or (r9,r10). As does any other 2 word item such
13217 as complex int due to a historical mistake. */
13218 if (n_words == 2)
13219 gregno += (1 - gregno) & 1;
13221 /* Multi-reg args are not split between registers and stack. */
13222 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
13223 return NULL_RTX;
13225 if (TARGET_32BIT && TARGET_POWERPC64)
13226 return rs6000_mixed_function_arg (mode, type,
13227 gregno - GP_ARG_MIN_REG);
13228 return gen_rtx_REG (mode, gregno);
13231 else
13233 int align_words = rs6000_parm_start (mode, type, cum->words);
13235 /* _Decimal128 must be passed in an even/odd float register pair.
13236 This assumes that the register number is odd when fregno is odd. */
13237 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
13238 cum->fregno++;
13240 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13242 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13243 rtx r, off;
13244 int i, k = 0;
13245 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13246 int fpr_words;
13248 /* Do we also need to pass this argument in the parameter
13249 save area? */
13250 if (type && (cum->nargs_prototype <= 0
13251 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13252 && TARGET_XL_COMPAT
13253 && align_words >= GP_ARG_NUM_REG)))
13254 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13256 /* Describe where this argument goes in the fprs. */
13257 for (i = 0; i < n_elts
13258 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13260 /* Check if the argument is split over registers and memory.
13261 This can only ever happen for long double or _Decimal128;
13262 complex types are handled via split_complex_arg. */
13263 machine_mode fmode = elt_mode;
13264 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13266 gcc_assert (FLOAT128_2REG_P (fmode));
13267 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13270 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13271 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13272 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13275 /* If there were not enough FPRs to hold the argument, the rest
13276 usually goes into memory. However, if the current position
13277 is still within the register parameter area, a portion may
13278 actually have to go into GPRs.
13280 Note that it may happen that the portion of the argument
13281 passed in the first "half" of the first GPR was already
13282 passed in the last FPR as well.
13284 For unnamed arguments, we already set up GPRs to cover the
13285 whole argument in rs6000_psave_function_arg, so there is
13286 nothing further to do at this point. */
13287 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13288 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13289 && cum->nargs_prototype > 0)
13291 static bool warned;
13293 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13294 int n_words = rs6000_arg_size (mode, type);
13296 align_words += fpr_words;
13297 n_words -= fpr_words;
13301 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13302 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13303 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13305 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13307 if (!warned && warn_psabi)
13309 warned = true;
13310 inform (input_location,
13311 "the ABI of passing homogeneous float aggregates"
13312 " has changed in GCC 5");
13316 return rs6000_finish_function_arg (mode, rvec, k);
13318 else if (align_words < GP_ARG_NUM_REG)
13320 if (TARGET_32BIT && TARGET_POWERPC64)
13321 return rs6000_mixed_function_arg (mode, type, align_words);
13323 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13325 else
13326 return NULL_RTX;
13330 /* For an arg passed partly in registers and partly in memory, this is
13331 the number of bytes passed in registers. For args passed entirely in
13332 registers or entirely in memory, zero. When an arg is described by a
13333 PARALLEL, perhaps using more than one register type, this function
13334 returns the number of bytes used by the first element of the PARALLEL. */
13336 static int
13337 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13338 tree type, bool named)
13340 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13341 bool passed_in_gprs = true;
13342 int ret = 0;
13343 int align_words;
13344 machine_mode elt_mode;
13345 int n_elts;
13347 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13349 if (DEFAULT_ABI == ABI_V4)
13350 return 0;
13352 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13354 /* If we are passing this arg in the fixed parameter save area (gprs or
13355 memory) as well as VRs, we do not use the partial bytes mechanism;
13356 instead, rs6000_function_arg will return a PARALLEL including a memory
13357 element as necessary. Library support functions for IEEE 128-bit are
13358 assumed to not need the value passed both in GPRs and in vector
13359 registers. */
13360 if (TARGET_64BIT && !cum->prototype
13361 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13362 return 0;
13364 /* Otherwise, we pass in VRs only. Check for partial copies. */
13365 passed_in_gprs = false;
13366 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13367 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13370 /* In this complicated case we just disable the partial_nregs code. */
13371 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13372 return 0;
13374 align_words = rs6000_parm_start (mode, type, cum->words);
13376 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13378 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13380 /* If we are passing this arg in the fixed parameter save area
13381 (gprs or memory) as well as FPRs, we do not use the partial
13382 bytes mechanism; instead, rs6000_function_arg will return a
13383 PARALLEL including a memory element as necessary. */
13384 if (type
13385 && (cum->nargs_prototype <= 0
13386 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13387 && TARGET_XL_COMPAT
13388 && align_words >= GP_ARG_NUM_REG)))
13389 return 0;
13391 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13392 passed_in_gprs = false;
13393 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13395 /* Compute number of bytes / words passed in FPRs. If there
13396 is still space available in the register parameter area
13397 *after* that amount, a part of the argument will be passed
13398 in GPRs. In that case, the total amount passed in any
13399 registers is equal to the amount that would have been passed
13400 in GPRs if everything were passed there, so we fall back to
13401 the GPR code below to compute the appropriate value. */
13402 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13403 * MIN (8, GET_MODE_SIZE (elt_mode)));
13404 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13406 if (align_words + fpr_words < GP_ARG_NUM_REG)
13407 passed_in_gprs = true;
13408 else
13409 ret = fpr;
13413 if (passed_in_gprs
13414 && align_words < GP_ARG_NUM_REG
13415 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13416 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13418 if (ret != 0 && TARGET_DEBUG_ARG)
13419 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13421 return ret;
13424 /* A C expression that indicates when an argument must be passed by
13425 reference. If nonzero for an argument, a copy of that argument is
13426 made in memory and a pointer to the argument is passed instead of
13427 the argument itself. The pointer is passed in whatever way is
13428 appropriate for passing a pointer to that type.
13430 Under V.4, aggregates and long double are passed by reference.
13432 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13433 reference unless the AltiVec vector extension ABI is in force.
13435 As an extension to all ABIs, variable sized types are passed by
13436 reference. */
13438 static bool
13439 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13440 machine_mode mode, const_tree type,
13441 bool named ATTRIBUTE_UNUSED)
13443 if (!type)
13444 return 0;
13446 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13447 && FLOAT128_IEEE_P (TYPE_MODE (type)))
13449 if (TARGET_DEBUG_ARG)
13450 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13451 return 1;
13454 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13456 if (TARGET_DEBUG_ARG)
13457 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13458 return 1;
13461 if (int_size_in_bytes (type) < 0)
13463 if (TARGET_DEBUG_ARG)
13464 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13465 return 1;
13468 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13469 modes only exist for GCC vector types if -maltivec. */
13470 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13472 if (TARGET_DEBUG_ARG)
13473 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13474 return 1;
13477 /* Pass synthetic vectors in memory. */
13478 if (TREE_CODE (type) == VECTOR_TYPE
13479 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13481 static bool warned_for_pass_big_vectors = false;
13482 if (TARGET_DEBUG_ARG)
13483 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13484 if (!warned_for_pass_big_vectors)
13486 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13487 "non-standard ABI extension with no compatibility guarantee");
13488 warned_for_pass_big_vectors = true;
13490 return 1;
13493 return 0;
13496 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13497 already processes. Return true if the parameter must be passed
13498 (fully or partially) on the stack. */
13500 static bool
13501 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13503 machine_mode mode;
13504 int unsignedp;
13505 rtx entry_parm;
13507 /* Catch errors. */
13508 if (type == NULL || type == error_mark_node)
13509 return true;
13511 /* Handle types with no storage requirement. */
13512 if (TYPE_MODE (type) == VOIDmode)
13513 return false;
13515 /* Handle complex types. */
13516 if (TREE_CODE (type) == COMPLEX_TYPE)
13517 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13518 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13520 /* Handle transparent aggregates. */
13521 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13522 && TYPE_TRANSPARENT_AGGR (type))
13523 type = TREE_TYPE (first_field (type));
13525 /* See if this arg was passed by invisible reference. */
13526 if (pass_by_reference (get_cumulative_args (args_so_far),
13527 TYPE_MODE (type), type, true))
13528 type = build_pointer_type (type);
13530 /* Find mode as it is passed by the ABI. */
13531 unsignedp = TYPE_UNSIGNED (type);
13532 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13534 /* If we must pass in stack, we need a stack. */
13535 if (rs6000_must_pass_in_stack (mode, type))
13536 return true;
13538 /* If there is no incoming register, we need a stack. */
13539 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13540 if (entry_parm == NULL)
13541 return true;
13543 /* Likewise if we need to pass both in registers and on the stack. */
13544 if (GET_CODE (entry_parm) == PARALLEL
13545 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13546 return true;
13548 /* Also true if we're partially in registers and partially not. */
13549 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13550 return true;
13552 /* Update info on where next arg arrives in registers. */
13553 rs6000_function_arg_advance (args_so_far, mode, type, true);
13554 return false;
13557 /* Return true if FUN has no prototype, has a variable argument
13558 list, or passes any parameter in memory. */
13560 static bool
13561 rs6000_function_parms_need_stack (tree fun, bool incoming)
13563 tree fntype, result;
13564 CUMULATIVE_ARGS args_so_far_v;
13565 cumulative_args_t args_so_far;
13567 if (!fun)
13568 /* Must be a libcall, all of which only use reg parms. */
13569 return false;
13571 fntype = fun;
13572 if (!TYPE_P (fun))
13573 fntype = TREE_TYPE (fun);
13575 /* Varargs functions need the parameter save area. */
13576 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13577 return true;
13579 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13580 args_so_far = pack_cumulative_args (&args_so_far_v);
13582 /* When incoming, we will have been passed the function decl.
13583 It is necessary to use the decl to handle K&R style functions,
13584 where TYPE_ARG_TYPES may not be available. */
13585 if (incoming)
13587 gcc_assert (DECL_P (fun));
13588 result = DECL_RESULT (fun);
13590 else
13591 result = TREE_TYPE (fntype);
13593 if (result && aggregate_value_p (result, fntype))
13595 if (!TYPE_P (result))
13596 result = TREE_TYPE (result);
13597 result = build_pointer_type (result);
13598 rs6000_parm_needs_stack (args_so_far, result);
13601 if (incoming)
13603 tree parm;
13605 for (parm = DECL_ARGUMENTS (fun);
13606 parm && parm != void_list_node;
13607 parm = TREE_CHAIN (parm))
13608 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13609 return true;
13611 else
13613 function_args_iterator args_iter;
13614 tree arg_type;
13616 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13617 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13618 return true;
13621 return false;
13624 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13625 usually a constant depending on the ABI. However, in the ELFv2 ABI
13626 the register parameter area is optional when calling a function that
13627 has a prototype is scope, has no variable argument list, and passes
13628 all parameters in registers. */
13631 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13633 int reg_parm_stack_space;
13635 switch (DEFAULT_ABI)
13637 default:
13638 reg_parm_stack_space = 0;
13639 break;
13641 case ABI_AIX:
13642 case ABI_DARWIN:
13643 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13644 break;
13646 case ABI_ELFv2:
13647 /* ??? Recomputing this every time is a bit expensive. Is there
13648 a place to cache this information? */
13649 if (rs6000_function_parms_need_stack (fun, incoming))
13650 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13651 else
13652 reg_parm_stack_space = 0;
13653 break;
13656 return reg_parm_stack_space;
13659 static void
13660 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13662 int i;
13663 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13665 if (nregs == 0)
13666 return;
13668 for (i = 0; i < nregs; i++)
13670 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13671 if (reload_completed)
13673 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13674 tem = NULL_RTX;
13675 else
13676 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13677 i * GET_MODE_SIZE (reg_mode));
13679 else
13680 tem = replace_equiv_address (tem, XEXP (tem, 0));
13682 gcc_assert (tem);
13684 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13688 /* Perform any needed actions needed for a function that is receiving a
13689 variable number of arguments.
13691 CUM is as above.
13693 MODE and TYPE are the mode and type of the current parameter.
13695 PRETEND_SIZE is a variable that should be set to the amount of stack
13696 that must be pushed by the prolog to pretend that our caller pushed
13699 Normally, this macro will push all remaining incoming registers on the
13700 stack and set PRETEND_SIZE to the length of the registers pushed. */
13702 static void
13703 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13704 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13705 int no_rtl)
13707 CUMULATIVE_ARGS next_cum;
13708 int reg_size = TARGET_32BIT ? 4 : 8;
13709 rtx save_area = NULL_RTX, mem;
13710 int first_reg_offset;
13711 alias_set_type set;
13713 /* Skip the last named argument. */
13714 next_cum = *get_cumulative_args (cum);
13715 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13717 if (DEFAULT_ABI == ABI_V4)
13719 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13721 if (! no_rtl)
13723 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13724 HOST_WIDE_INT offset = 0;
13726 /* Try to optimize the size of the varargs save area.
13727 The ABI requires that ap.reg_save_area is doubleword
13728 aligned, but we don't need to allocate space for all
13729 the bytes, only those to which we actually will save
13730 anything. */
13731 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13732 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13733 if (TARGET_HARD_FLOAT && TARGET_FPRS
13734 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13735 && cfun->va_list_fpr_size)
13737 if (gpr_reg_num)
13738 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13739 * UNITS_PER_FP_WORD;
13740 if (cfun->va_list_fpr_size
13741 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13742 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13743 else
13744 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13745 * UNITS_PER_FP_WORD;
13747 if (gpr_reg_num)
13749 offset = -((first_reg_offset * reg_size) & ~7);
13750 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13752 gpr_reg_num = cfun->va_list_gpr_size;
13753 if (reg_size == 4 && (first_reg_offset & 1))
13754 gpr_reg_num++;
13756 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13758 else if (fpr_size)
13759 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13760 * UNITS_PER_FP_WORD
13761 - (int) (GP_ARG_NUM_REG * reg_size);
13763 if (gpr_size + fpr_size)
13765 rtx reg_save_area
13766 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13767 gcc_assert (GET_CODE (reg_save_area) == MEM);
13768 reg_save_area = XEXP (reg_save_area, 0);
13769 if (GET_CODE (reg_save_area) == PLUS)
13771 gcc_assert (XEXP (reg_save_area, 0)
13772 == virtual_stack_vars_rtx);
13773 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13774 offset += INTVAL (XEXP (reg_save_area, 1));
13776 else
13777 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13780 cfun->machine->varargs_save_offset = offset;
13781 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13784 else
13786 first_reg_offset = next_cum.words;
13787 save_area = crtl->args.internal_arg_pointer;
13789 if (targetm.calls.must_pass_in_stack (mode, type))
13790 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13793 set = get_varargs_alias_set ();
13794 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13795 && cfun->va_list_gpr_size)
13797 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13799 if (va_list_gpr_counter_field)
13800 /* V4 va_list_gpr_size counts number of registers needed. */
13801 n_gpr = cfun->va_list_gpr_size;
13802 else
13803 /* char * va_list instead counts number of bytes needed. */
13804 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13806 if (nregs > n_gpr)
13807 nregs = n_gpr;
13809 mem = gen_rtx_MEM (BLKmode,
13810 plus_constant (Pmode, save_area,
13811 first_reg_offset * reg_size));
13812 MEM_NOTRAP_P (mem) = 1;
13813 set_mem_alias_set (mem, set);
13814 set_mem_align (mem, BITS_PER_WORD);
13816 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13817 nregs);
13820 /* Save FP registers if needed. */
13821 if (DEFAULT_ABI == ABI_V4
13822 && TARGET_HARD_FLOAT && TARGET_FPRS
13823 && ! no_rtl
13824 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13825 && cfun->va_list_fpr_size)
13827 int fregno = next_cum.fregno, nregs;
13828 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13829 rtx lab = gen_label_rtx ();
13830 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13831 * UNITS_PER_FP_WORD);
13833 emit_jump_insn
13834 (gen_rtx_SET (pc_rtx,
13835 gen_rtx_IF_THEN_ELSE (VOIDmode,
13836 gen_rtx_NE (VOIDmode, cr1,
13837 const0_rtx),
13838 gen_rtx_LABEL_REF (VOIDmode, lab),
13839 pc_rtx)));
13841 for (nregs = 0;
13842 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13843 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13845 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13846 ? DFmode : SFmode,
13847 plus_constant (Pmode, save_area, off));
13848 MEM_NOTRAP_P (mem) = 1;
13849 set_mem_alias_set (mem, set);
13850 set_mem_align (mem, GET_MODE_ALIGNMENT (
13851 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13852 ? DFmode : SFmode));
13853 emit_move_insn (mem, gen_rtx_REG (
13854 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13855 ? DFmode : SFmode, fregno));
13858 emit_label (lab);
13862 /* Create the va_list data type. */
13864 static tree
13865 rs6000_build_builtin_va_list (void)
13867 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13869 /* For AIX, prefer 'char *' because that's what the system
13870 header files like. */
13871 if (DEFAULT_ABI != ABI_V4)
13872 return build_pointer_type (char_type_node);
13874 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13875 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13876 get_identifier ("__va_list_tag"), record);
13878 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13879 unsigned_char_type_node);
13880 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13881 unsigned_char_type_node);
13882 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13883 every user file. */
13884 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13885 get_identifier ("reserved"), short_unsigned_type_node);
13886 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13887 get_identifier ("overflow_arg_area"),
13888 ptr_type_node);
13889 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13890 get_identifier ("reg_save_area"),
13891 ptr_type_node);
13893 va_list_gpr_counter_field = f_gpr;
13894 va_list_fpr_counter_field = f_fpr;
13896 DECL_FIELD_CONTEXT (f_gpr) = record;
13897 DECL_FIELD_CONTEXT (f_fpr) = record;
13898 DECL_FIELD_CONTEXT (f_res) = record;
13899 DECL_FIELD_CONTEXT (f_ovf) = record;
13900 DECL_FIELD_CONTEXT (f_sav) = record;
13902 TYPE_STUB_DECL (record) = type_decl;
13903 TYPE_NAME (record) = type_decl;
13904 TYPE_FIELDS (record) = f_gpr;
13905 DECL_CHAIN (f_gpr) = f_fpr;
13906 DECL_CHAIN (f_fpr) = f_res;
13907 DECL_CHAIN (f_res) = f_ovf;
13908 DECL_CHAIN (f_ovf) = f_sav;
13910 layout_type (record);
13912 /* The correct type is an array type of one element. */
13913 return build_array_type (record, build_index_type (size_zero_node));
13916 /* Implement va_start. */
13918 static void
13919 rs6000_va_start (tree valist, rtx nextarg)
13921 HOST_WIDE_INT words, n_gpr, n_fpr;
13922 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13923 tree gpr, fpr, ovf, sav, t;
13925 /* Only SVR4 needs something special. */
13926 if (DEFAULT_ABI != ABI_V4)
13928 std_expand_builtin_va_start (valist, nextarg);
13929 return;
13932 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13933 f_fpr = DECL_CHAIN (f_gpr);
13934 f_res = DECL_CHAIN (f_fpr);
13935 f_ovf = DECL_CHAIN (f_res);
13936 f_sav = DECL_CHAIN (f_ovf);
13938 valist = build_simple_mem_ref (valist);
13939 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13940 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13941 f_fpr, NULL_TREE);
13942 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13943 f_ovf, NULL_TREE);
13944 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13945 f_sav, NULL_TREE);
13947 /* Count number of gp and fp argument registers used. */
13948 words = crtl->args.info.words;
13949 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13950 GP_ARG_NUM_REG);
13951 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13952 FP_ARG_NUM_REG);
13954 if (TARGET_DEBUG_ARG)
13955 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13956 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13957 words, n_gpr, n_fpr);
13959 if (cfun->va_list_gpr_size)
13961 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13962 build_int_cst (NULL_TREE, n_gpr));
13963 TREE_SIDE_EFFECTS (t) = 1;
13964 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13967 if (cfun->va_list_fpr_size)
13969 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13970 build_int_cst (NULL_TREE, n_fpr));
13971 TREE_SIDE_EFFECTS (t) = 1;
13972 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13974 #ifdef HAVE_AS_GNU_ATTRIBUTE
13975 if (call_ABI_of_interest (cfun->decl))
13976 rs6000_passes_float = true;
13977 #endif
13980 /* Find the overflow area. */
13981 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13982 if (words != 0)
13983 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13984 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13985 TREE_SIDE_EFFECTS (t) = 1;
13986 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13988 /* If there were no va_arg invocations, don't set up the register
13989 save area. */
13990 if (!cfun->va_list_gpr_size
13991 && !cfun->va_list_fpr_size
13992 && n_gpr < GP_ARG_NUM_REG
13993 && n_fpr < FP_ARG_V4_MAX_REG)
13994 return;
13996 /* Find the register save area. */
13997 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13998 if (cfun->machine->varargs_save_offset)
13999 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
14000 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
14001 TREE_SIDE_EFFECTS (t) = 1;
14002 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14005 /* Implement va_arg. */
14007 static tree
14008 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
14009 gimple_seq *post_p)
14011 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
14012 tree gpr, fpr, ovf, sav, reg, t, u;
14013 int size, rsize, n_reg, sav_ofs, sav_scale;
14014 tree lab_false, lab_over, addr;
14015 int align;
14016 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
14017 int regalign = 0;
14018 gimple *stmt;
14020 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
14022 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
14023 return build_va_arg_indirect_ref (t);
14026 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
14027 earlier version of gcc, with the property that it always applied alignment
14028 adjustments to the va-args (even for zero-sized types). The cheapest way
14029 to deal with this is to replicate the effect of the part of
14030 std_gimplify_va_arg_expr that carries out the align adjust, for the case
14031 of relevance.
14032 We don't need to check for pass-by-reference because of the test above.
14033 We can return a simplifed answer, since we know there's no offset to add. */
14035 if (((TARGET_MACHO
14036 && rs6000_darwin64_abi)
14037 || DEFAULT_ABI == ABI_ELFv2
14038 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
14039 && integer_zerop (TYPE_SIZE (type)))
14041 unsigned HOST_WIDE_INT align, boundary;
14042 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
14043 align = PARM_BOUNDARY / BITS_PER_UNIT;
14044 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
14045 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
14046 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
14047 boundary /= BITS_PER_UNIT;
14048 if (boundary > align)
14050 tree t ;
14051 /* This updates arg ptr by the amount that would be necessary
14052 to align the zero-sized (but not zero-alignment) item. */
14053 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14054 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
14055 gimplify_and_add (t, pre_p);
14057 t = fold_convert (sizetype, valist_tmp);
14058 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14059 fold_convert (TREE_TYPE (valist),
14060 fold_build2 (BIT_AND_EXPR, sizetype, t,
14061 size_int (-boundary))));
14062 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
14063 gimplify_and_add (t, pre_p);
14065 /* Since it is zero-sized there's no increment for the item itself. */
14066 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
14067 return build_va_arg_indirect_ref (valist_tmp);
14070 if (DEFAULT_ABI != ABI_V4)
14072 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
14074 tree elem_type = TREE_TYPE (type);
14075 machine_mode elem_mode = TYPE_MODE (elem_type);
14076 int elem_size = GET_MODE_SIZE (elem_mode);
14078 if (elem_size < UNITS_PER_WORD)
14080 tree real_part, imag_part;
14081 gimple_seq post = NULL;
14083 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14084 &post);
14085 /* Copy the value into a temporary, lest the formal temporary
14086 be reused out from under us. */
14087 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
14088 gimple_seq_add_seq (pre_p, post);
14090 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14091 post_p);
14093 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
14097 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
14100 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
14101 f_fpr = DECL_CHAIN (f_gpr);
14102 f_res = DECL_CHAIN (f_fpr);
14103 f_ovf = DECL_CHAIN (f_res);
14104 f_sav = DECL_CHAIN (f_ovf);
14106 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
14107 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
14108 f_fpr, NULL_TREE);
14109 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
14110 f_ovf, NULL_TREE);
14111 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
14112 f_sav, NULL_TREE);
14114 size = int_size_in_bytes (type);
14115 rsize = (size + 3) / 4;
14116 int pad = 4 * rsize - size;
14117 align = 1;
14119 machine_mode mode = TYPE_MODE (type);
14120 if (abi_v4_pass_in_fpr (mode))
14122 /* FP args go in FP registers, if present. */
14123 reg = fpr;
14124 n_reg = (size + 7) / 8;
14125 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
14126 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
14127 if (mode != SFmode && mode != SDmode)
14128 align = 8;
14130 else
14132 /* Otherwise into GP registers. */
14133 reg = gpr;
14134 n_reg = rsize;
14135 sav_ofs = 0;
14136 sav_scale = 4;
14137 if (n_reg == 2)
14138 align = 8;
14141 /* Pull the value out of the saved registers.... */
14143 lab_over = NULL;
14144 addr = create_tmp_var (ptr_type_node, "addr");
14146 /* AltiVec vectors never go in registers when -mabi=altivec. */
14147 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
14148 align = 16;
14149 else
14151 lab_false = create_artificial_label (input_location);
14152 lab_over = create_artificial_label (input_location);
14154 /* Long long and SPE vectors are aligned in the registers.
14155 As are any other 2 gpr item such as complex int due to a
14156 historical mistake. */
14157 u = reg;
14158 if (n_reg == 2 && reg == gpr)
14160 regalign = 1;
14161 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14162 build_int_cst (TREE_TYPE (reg), n_reg - 1));
14163 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
14164 unshare_expr (reg), u);
14166 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14167 reg number is 0 for f1, so we want to make it odd. */
14168 else if (reg == fpr && mode == TDmode)
14170 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14171 build_int_cst (TREE_TYPE (reg), 1));
14172 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
14175 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
14176 t = build2 (GE_EXPR, boolean_type_node, u, t);
14177 u = build1 (GOTO_EXPR, void_type_node, lab_false);
14178 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
14179 gimplify_and_add (t, pre_p);
14181 t = sav;
14182 if (sav_ofs)
14183 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
14185 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14186 build_int_cst (TREE_TYPE (reg), n_reg));
14187 u = fold_convert (sizetype, u);
14188 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
14189 t = fold_build_pointer_plus (t, u);
14191 /* _Decimal32 varargs are located in the second word of the 64-bit
14192 FP register for 32-bit binaries. */
14193 if (TARGET_32BIT
14194 && TARGET_HARD_FLOAT && TARGET_FPRS
14195 && mode == SDmode)
14196 t = fold_build_pointer_plus_hwi (t, size);
14198 /* Args are passed right-aligned. */
14199 if (BYTES_BIG_ENDIAN)
14200 t = fold_build_pointer_plus_hwi (t, pad);
14202 gimplify_assign (addr, t, pre_p);
14204 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
14206 stmt = gimple_build_label (lab_false);
14207 gimple_seq_add_stmt (pre_p, stmt);
14209 if ((n_reg == 2 && !regalign) || n_reg > 2)
14211 /* Ensure that we don't find any more args in regs.
14212 Alignment has taken care of for special cases. */
14213 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
14217 /* ... otherwise out of the overflow area. */
14219 /* Care for on-stack alignment if needed. */
14220 t = ovf;
14221 if (align != 1)
14223 t = fold_build_pointer_plus_hwi (t, align - 1);
14224 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14225 build_int_cst (TREE_TYPE (t), -align));
14228 /* Args are passed right-aligned. */
14229 if (BYTES_BIG_ENDIAN)
14230 t = fold_build_pointer_plus_hwi (t, pad);
14232 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
14234 gimplify_assign (unshare_expr (addr), t, pre_p);
14236 t = fold_build_pointer_plus_hwi (t, size);
14237 gimplify_assign (unshare_expr (ovf), t, pre_p);
14239 if (lab_over)
14241 stmt = gimple_build_label (lab_over);
14242 gimple_seq_add_stmt (pre_p, stmt);
14245 if (STRICT_ALIGNMENT
14246 && (TYPE_ALIGN (type)
14247 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14249 /* The value (of type complex double, for example) may not be
14250 aligned in memory in the saved registers, so copy via a
14251 temporary. (This is the same code as used for SPARC.) */
14252 tree tmp = create_tmp_var (type, "va_arg_tmp");
14253 tree dest_addr = build_fold_addr_expr (tmp);
14255 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14256 3, dest_addr, addr, size_int (rsize * 4));
14257 TREE_ADDRESSABLE (tmp) = 1;
14259 gimplify_and_add (copy, pre_p);
14260 addr = dest_addr;
14263 addr = fold_convert (ptrtype, addr);
14264 return build_va_arg_indirect_ref (addr);
14267 /* Builtins. */
14269 static void
14270 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14272 tree t;
14273 unsigned classify = rs6000_builtin_info[(int)code].attr;
14274 const char *attr_string = "";
14276 gcc_assert (name != NULL);
14277 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14279 if (rs6000_builtin_decls[(int)code])
14280 fatal_error (input_location,
14281 "internal error: builtin function %s already processed", name);
14283 rs6000_builtin_decls[(int)code] = t =
14284 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14286 /* Set any special attributes. */
14287 if ((classify & RS6000_BTC_CONST) != 0)
14289 /* const function, function only depends on the inputs. */
14290 TREE_READONLY (t) = 1;
14291 TREE_NOTHROW (t) = 1;
14292 attr_string = ", const";
14294 else if ((classify & RS6000_BTC_PURE) != 0)
14296 /* pure function, function can read global memory, but does not set any
14297 external state. */
14298 DECL_PURE_P (t) = 1;
14299 TREE_NOTHROW (t) = 1;
14300 attr_string = ", pure";
14302 else if ((classify & RS6000_BTC_FP) != 0)
14304 /* Function is a math function. If rounding mode is on, then treat the
14305 function as not reading global memory, but it can have arbitrary side
14306 effects. If it is off, then assume the function is a const function.
14307 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14308 builtin-attribute.def that is used for the math functions. */
14309 TREE_NOTHROW (t) = 1;
14310 if (flag_rounding_math)
14312 DECL_PURE_P (t) = 1;
14313 DECL_IS_NOVOPS (t) = 1;
14314 attr_string = ", fp, pure";
14316 else
14318 TREE_READONLY (t) = 1;
14319 attr_string = ", fp, const";
14322 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14323 gcc_unreachable ();
14325 if (TARGET_DEBUG_BUILTIN)
14326 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14327 (int)code, name, attr_string);
14330 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14332 #undef RS6000_BUILTIN_0
14333 #undef RS6000_BUILTIN_1
14334 #undef RS6000_BUILTIN_2
14335 #undef RS6000_BUILTIN_3
14336 #undef RS6000_BUILTIN_A
14337 #undef RS6000_BUILTIN_D
14338 #undef RS6000_BUILTIN_E
14339 #undef RS6000_BUILTIN_H
14340 #undef RS6000_BUILTIN_P
14341 #undef RS6000_BUILTIN_Q
14342 #undef RS6000_BUILTIN_S
14343 #undef RS6000_BUILTIN_X
14345 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14346 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14347 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14349 { MASK, ICODE, NAME, ENUM },
14351 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14352 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14353 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14354 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14355 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14356 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14357 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14358 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14360 static const struct builtin_description bdesc_3arg[] =
14362 #include "powerpcspe-builtin.def"
14365 /* DST operations: void foo (void *, const int, const char). */
14367 #undef RS6000_BUILTIN_0
14368 #undef RS6000_BUILTIN_1
14369 #undef RS6000_BUILTIN_2
14370 #undef RS6000_BUILTIN_3
14371 #undef RS6000_BUILTIN_A
14372 #undef RS6000_BUILTIN_D
14373 #undef RS6000_BUILTIN_E
14374 #undef RS6000_BUILTIN_H
14375 #undef RS6000_BUILTIN_P
14376 #undef RS6000_BUILTIN_Q
14377 #undef RS6000_BUILTIN_S
14378 #undef RS6000_BUILTIN_X
14380 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14384 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14385 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14386 { MASK, ICODE, NAME, ENUM },
14388 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14389 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14390 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14391 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14392 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14393 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14395 static const struct builtin_description bdesc_dst[] =
14397 #include "powerpcspe-builtin.def"
14400 /* Simple binary operations: VECc = foo (VECa, VECb). */
14402 #undef RS6000_BUILTIN_0
14403 #undef RS6000_BUILTIN_1
14404 #undef RS6000_BUILTIN_2
14405 #undef RS6000_BUILTIN_3
14406 #undef RS6000_BUILTIN_A
14407 #undef RS6000_BUILTIN_D
14408 #undef RS6000_BUILTIN_E
14409 #undef RS6000_BUILTIN_H
14410 #undef RS6000_BUILTIN_P
14411 #undef RS6000_BUILTIN_Q
14412 #undef RS6000_BUILTIN_S
14413 #undef RS6000_BUILTIN_X
14415 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14418 { MASK, ICODE, NAME, ENUM },
14420 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14421 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14422 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14423 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14424 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14425 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14426 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14427 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14428 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14430 static const struct builtin_description bdesc_2arg[] =
14432 #include "powerpcspe-builtin.def"
14435 #undef RS6000_BUILTIN_0
14436 #undef RS6000_BUILTIN_1
14437 #undef RS6000_BUILTIN_2
14438 #undef RS6000_BUILTIN_3
14439 #undef RS6000_BUILTIN_A
14440 #undef RS6000_BUILTIN_D
14441 #undef RS6000_BUILTIN_E
14442 #undef RS6000_BUILTIN_H
14443 #undef RS6000_BUILTIN_P
14444 #undef RS6000_BUILTIN_Q
14445 #undef RS6000_BUILTIN_S
14446 #undef RS6000_BUILTIN_X
14448 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14452 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14453 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14454 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14455 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14456 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14457 { MASK, ICODE, NAME, ENUM },
14459 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14460 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14461 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14463 /* AltiVec predicates. */
14465 static const struct builtin_description bdesc_altivec_preds[] =
14467 #include "powerpcspe-builtin.def"
14470 /* SPE predicates. */
14471 #undef RS6000_BUILTIN_0
14472 #undef RS6000_BUILTIN_1
14473 #undef RS6000_BUILTIN_2
14474 #undef RS6000_BUILTIN_3
14475 #undef RS6000_BUILTIN_A
14476 #undef RS6000_BUILTIN_D
14477 #undef RS6000_BUILTIN_E
14478 #undef RS6000_BUILTIN_H
14479 #undef RS6000_BUILTIN_P
14480 #undef RS6000_BUILTIN_Q
14481 #undef RS6000_BUILTIN_S
14482 #undef RS6000_BUILTIN_X
14484 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14485 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14486 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14487 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14488 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14489 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14490 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14491 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14492 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14493 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14494 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14495 { MASK, ICODE, NAME, ENUM },
14497 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14499 static const struct builtin_description bdesc_spe_predicates[] =
14501 #include "powerpcspe-builtin.def"
14504 /* SPE evsel predicates. */
14505 #undef RS6000_BUILTIN_0
14506 #undef RS6000_BUILTIN_1
14507 #undef RS6000_BUILTIN_2
14508 #undef RS6000_BUILTIN_3
14509 #undef RS6000_BUILTIN_A
14510 #undef RS6000_BUILTIN_D
14511 #undef RS6000_BUILTIN_E
14512 #undef RS6000_BUILTIN_H
14513 #undef RS6000_BUILTIN_P
14514 #undef RS6000_BUILTIN_Q
14515 #undef RS6000_BUILTIN_S
14516 #undef RS6000_BUILTIN_X
14518 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14521 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14522 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14523 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14524 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14525 { MASK, ICODE, NAME, ENUM },
14527 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14528 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14529 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14530 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14531 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14533 static const struct builtin_description bdesc_spe_evsel[] =
14535 #include "powerpcspe-builtin.def"
14538 /* PAIRED predicates. */
14539 #undef RS6000_BUILTIN_0
14540 #undef RS6000_BUILTIN_1
14541 #undef RS6000_BUILTIN_2
14542 #undef RS6000_BUILTIN_3
14543 #undef RS6000_BUILTIN_A
14544 #undef RS6000_BUILTIN_D
14545 #undef RS6000_BUILTIN_E
14546 #undef RS6000_BUILTIN_H
14547 #undef RS6000_BUILTIN_P
14548 #undef RS6000_BUILTIN_Q
14549 #undef RS6000_BUILTIN_S
14550 #undef RS6000_BUILTIN_X
14552 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14554 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14556 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14557 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14558 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14559 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14560 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14561 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14562 { MASK, ICODE, NAME, ENUM },
14564 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14565 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14567 static const struct builtin_description bdesc_paired_preds[] =
14569 #include "powerpcspe-builtin.def"
14572 /* ABS* operations. */
14574 #undef RS6000_BUILTIN_0
14575 #undef RS6000_BUILTIN_1
14576 #undef RS6000_BUILTIN_2
14577 #undef RS6000_BUILTIN_3
14578 #undef RS6000_BUILTIN_A
14579 #undef RS6000_BUILTIN_D
14580 #undef RS6000_BUILTIN_E
14581 #undef RS6000_BUILTIN_H
14582 #undef RS6000_BUILTIN_P
14583 #undef RS6000_BUILTIN_Q
14584 #undef RS6000_BUILTIN_S
14585 #undef RS6000_BUILTIN_X
14587 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14590 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14591 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14592 { MASK, ICODE, NAME, ENUM },
14594 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14595 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14596 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14597 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14598 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14599 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14600 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14602 static const struct builtin_description bdesc_abs[] =
14604 #include "powerpcspe-builtin.def"
14607 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14608 foo (VECa). */
14610 #undef RS6000_BUILTIN_0
14611 #undef RS6000_BUILTIN_1
14612 #undef RS6000_BUILTIN_2
14613 #undef RS6000_BUILTIN_3
14614 #undef RS6000_BUILTIN_A
14615 #undef RS6000_BUILTIN_D
14616 #undef RS6000_BUILTIN_E
14617 #undef RS6000_BUILTIN_H
14618 #undef RS6000_BUILTIN_P
14619 #undef RS6000_BUILTIN_Q
14620 #undef RS6000_BUILTIN_S
14621 #undef RS6000_BUILTIN_X
14623 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14624 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14625 { MASK, ICODE, NAME, ENUM },
14627 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14628 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14629 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14630 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14631 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14632 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14633 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14634 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14635 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14636 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14638 static const struct builtin_description bdesc_1arg[] =
14640 #include "powerpcspe-builtin.def"
14643 /* Simple no-argument operations: result = __builtin_darn_32 () */
14645 #undef RS6000_BUILTIN_0
14646 #undef RS6000_BUILTIN_1
14647 #undef RS6000_BUILTIN_2
14648 #undef RS6000_BUILTIN_3
14649 #undef RS6000_BUILTIN_A
14650 #undef RS6000_BUILTIN_D
14651 #undef RS6000_BUILTIN_E
14652 #undef RS6000_BUILTIN_H
14653 #undef RS6000_BUILTIN_P
14654 #undef RS6000_BUILTIN_Q
14655 #undef RS6000_BUILTIN_S
14656 #undef RS6000_BUILTIN_X
14658 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14659 { MASK, ICODE, NAME, ENUM },
14661 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14662 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14663 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14664 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14665 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14666 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14667 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14668 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14669 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14670 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14671 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14673 static const struct builtin_description bdesc_0arg[] =
14675 #include "powerpcspe-builtin.def"
14678 /* HTM builtins. */
14679 #undef RS6000_BUILTIN_0
14680 #undef RS6000_BUILTIN_1
14681 #undef RS6000_BUILTIN_2
14682 #undef RS6000_BUILTIN_3
14683 #undef RS6000_BUILTIN_A
14684 #undef RS6000_BUILTIN_D
14685 #undef RS6000_BUILTIN_E
14686 #undef RS6000_BUILTIN_H
14687 #undef RS6000_BUILTIN_P
14688 #undef RS6000_BUILTIN_Q
14689 #undef RS6000_BUILTIN_S
14690 #undef RS6000_BUILTIN_X
14692 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14693 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14694 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14695 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14696 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14697 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14698 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14699 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14700 { MASK, ICODE, NAME, ENUM },
14702 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14703 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14704 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14705 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14707 static const struct builtin_description bdesc_htm[] =
14709 #include "powerpcspe-builtin.def"
14712 #undef RS6000_BUILTIN_0
14713 #undef RS6000_BUILTIN_1
14714 #undef RS6000_BUILTIN_2
14715 #undef RS6000_BUILTIN_3
14716 #undef RS6000_BUILTIN_A
14717 #undef RS6000_BUILTIN_D
14718 #undef RS6000_BUILTIN_E
14719 #undef RS6000_BUILTIN_H
14720 #undef RS6000_BUILTIN_P
14721 #undef RS6000_BUILTIN_Q
14722 #undef RS6000_BUILTIN_S
14724 /* Return true if a builtin function is overloaded. */
14725 bool
14726 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14728 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14731 const char *
14732 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14734 return rs6000_builtin_info[(int)fncode].name;
14737 /* Expand an expression EXP that calls a builtin without arguments. */
14738 static rtx
14739 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14741 rtx pat;
14742 machine_mode tmode = insn_data[icode].operand[0].mode;
14744 if (icode == CODE_FOR_nothing)
14745 /* Builtin not supported on this processor. */
14746 return 0;
14748 if (target == 0
14749 || GET_MODE (target) != tmode
14750 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14751 target = gen_reg_rtx (tmode);
14753 pat = GEN_FCN (icode) (target);
14754 if (! pat)
14755 return 0;
14756 emit_insn (pat);
14758 return target;
14762 static rtx
14763 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14765 rtx pat;
14766 tree arg0 = CALL_EXPR_ARG (exp, 0);
14767 tree arg1 = CALL_EXPR_ARG (exp, 1);
14768 rtx op0 = expand_normal (arg0);
14769 rtx op1 = expand_normal (arg1);
14770 machine_mode mode0 = insn_data[icode].operand[0].mode;
14771 machine_mode mode1 = insn_data[icode].operand[1].mode;
14773 if (icode == CODE_FOR_nothing)
14774 /* Builtin not supported on this processor. */
14775 return 0;
14777 /* If we got invalid arguments bail out before generating bad rtl. */
14778 if (arg0 == error_mark_node || arg1 == error_mark_node)
14779 return const0_rtx;
14781 if (GET_CODE (op0) != CONST_INT
14782 || INTVAL (op0) > 255
14783 || INTVAL (op0) < 0)
14785 error ("argument 1 must be an 8-bit field value");
14786 return const0_rtx;
14789 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14790 op0 = copy_to_mode_reg (mode0, op0);
14792 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14793 op1 = copy_to_mode_reg (mode1, op1);
14795 pat = GEN_FCN (icode) (op0, op1);
14796 if (! pat)
14797 return const0_rtx;
14798 emit_insn (pat);
14800 return NULL_RTX;
14803 static rtx
14804 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14806 rtx pat;
14807 tree arg0 = CALL_EXPR_ARG (exp, 0);
14808 rtx op0 = expand_normal (arg0);
14809 machine_mode tmode = insn_data[icode].operand[0].mode;
14810 machine_mode mode0 = insn_data[icode].operand[1].mode;
14812 if (icode == CODE_FOR_nothing)
14813 /* Builtin not supported on this processor. */
14814 return 0;
14816 /* If we got invalid arguments bail out before generating bad rtl. */
14817 if (arg0 == error_mark_node)
14818 return const0_rtx;
14820 if (icode == CODE_FOR_altivec_vspltisb
14821 || icode == CODE_FOR_altivec_vspltish
14822 || icode == CODE_FOR_altivec_vspltisw
14823 || icode == CODE_FOR_spe_evsplatfi
14824 || icode == CODE_FOR_spe_evsplati)
14826 /* Only allow 5-bit *signed* literals. */
14827 if (GET_CODE (op0) != CONST_INT
14828 || INTVAL (op0) > 15
14829 || INTVAL (op0) < -16)
14831 error ("argument 1 must be a 5-bit signed literal");
14832 return CONST0_RTX (tmode);
14836 if (target == 0
14837 || GET_MODE (target) != tmode
14838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14839 target = gen_reg_rtx (tmode);
14841 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14842 op0 = copy_to_mode_reg (mode0, op0);
14844 pat = GEN_FCN (icode) (target, op0);
14845 if (! pat)
14846 return 0;
14847 emit_insn (pat);
14849 return target;
14852 static rtx
14853 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14855 rtx pat, scratch1, scratch2;
14856 tree arg0 = CALL_EXPR_ARG (exp, 0);
14857 rtx op0 = expand_normal (arg0);
14858 machine_mode tmode = insn_data[icode].operand[0].mode;
14859 machine_mode mode0 = insn_data[icode].operand[1].mode;
14861 /* If we have invalid arguments, bail out before generating bad rtl. */
14862 if (arg0 == error_mark_node)
14863 return const0_rtx;
14865 if (target == 0
14866 || GET_MODE (target) != tmode
14867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14868 target = gen_reg_rtx (tmode);
14870 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14871 op0 = copy_to_mode_reg (mode0, op0);
14873 scratch1 = gen_reg_rtx (mode0);
14874 scratch2 = gen_reg_rtx (mode0);
14876 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14877 if (! pat)
14878 return 0;
14879 emit_insn (pat);
14881 return target;
14884 static rtx
14885 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14887 rtx pat;
14888 tree arg0 = CALL_EXPR_ARG (exp, 0);
14889 tree arg1 = CALL_EXPR_ARG (exp, 1);
14890 rtx op0 = expand_normal (arg0);
14891 rtx op1 = expand_normal (arg1);
14892 machine_mode tmode = insn_data[icode].operand[0].mode;
14893 machine_mode mode0 = insn_data[icode].operand[1].mode;
14894 machine_mode mode1 = insn_data[icode].operand[2].mode;
14896 if (icode == CODE_FOR_nothing)
14897 /* Builtin not supported on this processor. */
14898 return 0;
14900 /* If we got invalid arguments bail out before generating bad rtl. */
14901 if (arg0 == error_mark_node || arg1 == error_mark_node)
14902 return const0_rtx;
14904 if (icode == CODE_FOR_altivec_vcfux
14905 || icode == CODE_FOR_altivec_vcfsx
14906 || icode == CODE_FOR_altivec_vctsxs
14907 || icode == CODE_FOR_altivec_vctuxs
14908 || icode == CODE_FOR_altivec_vspltb
14909 || icode == CODE_FOR_altivec_vsplth
14910 || icode == CODE_FOR_altivec_vspltw
14911 || icode == CODE_FOR_spe_evaddiw
14912 || icode == CODE_FOR_spe_evldd
14913 || icode == CODE_FOR_spe_evldh
14914 || icode == CODE_FOR_spe_evldw
14915 || icode == CODE_FOR_spe_evlhhesplat
14916 || icode == CODE_FOR_spe_evlhhossplat
14917 || icode == CODE_FOR_spe_evlhhousplat
14918 || icode == CODE_FOR_spe_evlwhe
14919 || icode == CODE_FOR_spe_evlwhos
14920 || icode == CODE_FOR_spe_evlwhou
14921 || icode == CODE_FOR_spe_evlwhsplat
14922 || icode == CODE_FOR_spe_evlwwsplat
14923 || icode == CODE_FOR_spe_evrlwi
14924 || icode == CODE_FOR_spe_evslwi
14925 || icode == CODE_FOR_spe_evsrwis
14926 || icode == CODE_FOR_spe_evsubifw
14927 || icode == CODE_FOR_spe_evsrwiu)
14929 /* Only allow 5-bit unsigned literals. */
14930 STRIP_NOPS (arg1);
14931 if (TREE_CODE (arg1) != INTEGER_CST
14932 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14934 error ("argument 2 must be a 5-bit unsigned literal");
14935 return CONST0_RTX (tmode);
14938 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14939 || icode == CODE_FOR_dfptstsfi_lt_dd
14940 || icode == CODE_FOR_dfptstsfi_gt_dd
14941 || icode == CODE_FOR_dfptstsfi_unordered_dd
14942 || icode == CODE_FOR_dfptstsfi_eq_td
14943 || icode == CODE_FOR_dfptstsfi_lt_td
14944 || icode == CODE_FOR_dfptstsfi_gt_td
14945 || icode == CODE_FOR_dfptstsfi_unordered_td)
14947 /* Only allow 6-bit unsigned literals. */
14948 STRIP_NOPS (arg0);
14949 if (TREE_CODE (arg0) != INTEGER_CST
14950 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14952 error ("argument 1 must be a 6-bit unsigned literal");
14953 return CONST0_RTX (tmode);
14956 else if (icode == CODE_FOR_xststdcdp
14957 || icode == CODE_FOR_xststdcsp
14958 || icode == CODE_FOR_xvtstdcdp
14959 || icode == CODE_FOR_xvtstdcsp)
14961 /* Only allow 7-bit unsigned literals. */
14962 STRIP_NOPS (arg1);
14963 if (TREE_CODE (arg1) != INTEGER_CST
14964 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14966 error ("argument 2 must be a 7-bit unsigned literal");
14967 return CONST0_RTX (tmode);
14971 if (target == 0
14972 || GET_MODE (target) != tmode
14973 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14974 target = gen_reg_rtx (tmode);
14976 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14977 op0 = copy_to_mode_reg (mode0, op0);
14978 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14979 op1 = copy_to_mode_reg (mode1, op1);
14981 pat = GEN_FCN (icode) (target, op0, op1);
14982 if (! pat)
14983 return 0;
14984 emit_insn (pat);
14986 return target;
14989 static rtx
14990 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14992 rtx pat, scratch;
14993 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14994 tree arg0 = CALL_EXPR_ARG (exp, 1);
14995 tree arg1 = CALL_EXPR_ARG (exp, 2);
14996 rtx op0 = expand_normal (arg0);
14997 rtx op1 = expand_normal (arg1);
14998 machine_mode tmode = SImode;
14999 machine_mode mode0 = insn_data[icode].operand[1].mode;
15000 machine_mode mode1 = insn_data[icode].operand[2].mode;
15001 int cr6_form_int;
15003 if (TREE_CODE (cr6_form) != INTEGER_CST)
15005 error ("argument 1 of __builtin_altivec_predicate must be a constant");
15006 return const0_rtx;
15008 else
15009 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
15011 gcc_assert (mode0 == mode1);
15013 /* If we have invalid arguments, bail out before generating bad rtl. */
15014 if (arg0 == error_mark_node || arg1 == error_mark_node)
15015 return const0_rtx;
15017 if (target == 0
15018 || GET_MODE (target) != tmode
15019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15020 target = gen_reg_rtx (tmode);
15022 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15023 op0 = copy_to_mode_reg (mode0, op0);
15024 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15025 op1 = copy_to_mode_reg (mode1, op1);
15027 /* Note that for many of the relevant operations (e.g. cmpne or
15028 cmpeq) with float or double operands, it makes more sense for the
15029 mode of the allocated scratch register to select a vector of
15030 integer. But the choice to copy the mode of operand 0 was made
15031 long ago and there are no plans to change it. */
15032 scratch = gen_reg_rtx (mode0);
15034 pat = GEN_FCN (icode) (scratch, op0, op1);
15035 if (! pat)
15036 return 0;
15037 emit_insn (pat);
15039 /* The vec_any* and vec_all* predicates use the same opcodes for two
15040 different operations, but the bits in CR6 will be different
15041 depending on what information we want. So we have to play tricks
15042 with CR6 to get the right bits out.
15044 If you think this is disgusting, look at the specs for the
15045 AltiVec predicates. */
15047 switch (cr6_form_int)
15049 case 0:
15050 emit_insn (gen_cr6_test_for_zero (target));
15051 break;
15052 case 1:
15053 emit_insn (gen_cr6_test_for_zero_reverse (target));
15054 break;
15055 case 2:
15056 emit_insn (gen_cr6_test_for_lt (target));
15057 break;
15058 case 3:
15059 emit_insn (gen_cr6_test_for_lt_reverse (target));
15060 break;
15061 default:
15062 error ("argument 1 of __builtin_altivec_predicate is out of range");
15063 break;
15066 return target;
15069 static rtx
15070 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
15072 rtx pat, addr;
15073 tree arg0 = CALL_EXPR_ARG (exp, 0);
15074 tree arg1 = CALL_EXPR_ARG (exp, 1);
15075 machine_mode tmode = insn_data[icode].operand[0].mode;
15076 machine_mode mode0 = Pmode;
15077 machine_mode mode1 = Pmode;
15078 rtx op0 = expand_normal (arg0);
15079 rtx op1 = expand_normal (arg1);
15081 if (icode == CODE_FOR_nothing)
15082 /* Builtin not supported on this processor. */
15083 return 0;
15085 /* If we got invalid arguments bail out before generating bad rtl. */
15086 if (arg0 == error_mark_node || arg1 == error_mark_node)
15087 return const0_rtx;
15089 if (target == 0
15090 || GET_MODE (target) != tmode
15091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15092 target = gen_reg_rtx (tmode);
15094 op1 = copy_to_mode_reg (mode1, op1);
15096 if (op0 == const0_rtx)
15098 addr = gen_rtx_MEM (tmode, op1);
15100 else
15102 op0 = copy_to_mode_reg (mode0, op0);
15103 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
15106 pat = GEN_FCN (icode) (target, addr);
15108 if (! pat)
15109 return 0;
15110 emit_insn (pat);
15112 return target;
15115 /* Return a constant vector for use as a little-endian permute control vector
15116 to reverse the order of elements of the given vector mode. */
15117 static rtx
15118 swap_selector_for_mode (machine_mode mode)
15120 /* These are little endian vectors, so their elements are reversed
15121 from what you would normally expect for a permute control vector. */
15122 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15123 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15124 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15125 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15126 unsigned int *swaparray, i;
15127 rtx perm[16];
15129 switch (mode)
15131 case E_V2DFmode:
15132 case E_V2DImode:
15133 swaparray = swap2;
15134 break;
15135 case E_V4SFmode:
15136 case E_V4SImode:
15137 swaparray = swap4;
15138 break;
15139 case E_V8HImode:
15140 swaparray = swap8;
15141 break;
15142 case E_V16QImode:
15143 swaparray = swap16;
15144 break;
15145 default:
15146 gcc_unreachable ();
15149 for (i = 0; i < 16; ++i)
15150 perm[i] = GEN_INT (swaparray[i]);
15152 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
15155 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15156 with -maltivec=be specified. Issue the load followed by an element-
15157 reversing permute. */
15158 void
15159 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15161 rtx tmp = gen_reg_rtx (mode);
15162 rtx load = gen_rtx_SET (tmp, op1);
15163 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15164 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
15165 rtx sel = swap_selector_for_mode (mode);
15166 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
15168 gcc_assert (REG_P (op0));
15169 emit_insn (par);
15170 emit_insn (gen_rtx_SET (op0, vperm));
15173 /* Generate code for a "stvxl" built-in for a little endian target with
15174 -maltivec=be specified. Issue the store preceded by an element-reversing
15175 permute. */
15176 void
15177 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15179 rtx tmp = gen_reg_rtx (mode);
15180 rtx store = gen_rtx_SET (op0, tmp);
15181 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15182 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
15183 rtx sel = swap_selector_for_mode (mode);
15184 rtx vperm;
15186 gcc_assert (REG_P (op1));
15187 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15188 emit_insn (gen_rtx_SET (tmp, vperm));
15189 emit_insn (par);
15192 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15193 specified. Issue the store preceded by an element-reversing permute. */
15194 void
15195 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15197 machine_mode inner_mode = GET_MODE_INNER (mode);
15198 rtx tmp = gen_reg_rtx (mode);
15199 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
15200 rtx sel = swap_selector_for_mode (mode);
15201 rtx vperm;
15203 gcc_assert (REG_P (op1));
15204 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15205 emit_insn (gen_rtx_SET (tmp, vperm));
15206 emit_insn (gen_rtx_SET (op0, stvx));
15209 static rtx
15210 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
15212 rtx pat, addr;
15213 tree arg0 = CALL_EXPR_ARG (exp, 0);
15214 tree arg1 = CALL_EXPR_ARG (exp, 1);
15215 machine_mode tmode = insn_data[icode].operand[0].mode;
15216 machine_mode mode0 = Pmode;
15217 machine_mode mode1 = Pmode;
15218 rtx op0 = expand_normal (arg0);
15219 rtx op1 = expand_normal (arg1);
15221 if (icode == CODE_FOR_nothing)
15222 /* Builtin not supported on this processor. */
15223 return 0;
15225 /* If we got invalid arguments bail out before generating bad rtl. */
15226 if (arg0 == error_mark_node || arg1 == error_mark_node)
15227 return const0_rtx;
15229 if (target == 0
15230 || GET_MODE (target) != tmode
15231 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15232 target = gen_reg_rtx (tmode);
15234 op1 = copy_to_mode_reg (mode1, op1);
15236 /* For LVX, express the RTL accurately by ANDing the address with -16.
15237 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15238 so the raw address is fine. */
15239 if (icode == CODE_FOR_altivec_lvx_v2df_2op
15240 || icode == CODE_FOR_altivec_lvx_v2di_2op
15241 || icode == CODE_FOR_altivec_lvx_v4sf_2op
15242 || icode == CODE_FOR_altivec_lvx_v4si_2op
15243 || icode == CODE_FOR_altivec_lvx_v8hi_2op
15244 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
15246 rtx rawaddr;
15247 if (op0 == const0_rtx)
15248 rawaddr = op1;
15249 else
15251 op0 = copy_to_mode_reg (mode0, op0);
15252 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15254 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15255 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15257 /* For -maltivec=be, emit the load and follow it up with a
15258 permute to swap the elements. */
15259 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15261 rtx temp = gen_reg_rtx (tmode);
15262 emit_insn (gen_rtx_SET (temp, addr));
15264 rtx sel = swap_selector_for_mode (tmode);
15265 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15266 UNSPEC_VPERM);
15267 emit_insn (gen_rtx_SET (target, vperm));
15269 else
15270 emit_insn (gen_rtx_SET (target, addr));
15272 else
15274 if (op0 == const0_rtx)
15275 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15276 else
15278 op0 = copy_to_mode_reg (mode0, op0);
15279 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15280 gen_rtx_PLUS (Pmode, op1, op0));
15283 pat = GEN_FCN (icode) (target, addr);
15284 if (! pat)
15285 return 0;
15286 emit_insn (pat);
15289 return target;
15292 static rtx
15293 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15295 tree arg0 = CALL_EXPR_ARG (exp, 0);
15296 tree arg1 = CALL_EXPR_ARG (exp, 1);
15297 tree arg2 = CALL_EXPR_ARG (exp, 2);
15298 rtx op0 = expand_normal (arg0);
15299 rtx op1 = expand_normal (arg1);
15300 rtx op2 = expand_normal (arg2);
15301 rtx pat;
15302 machine_mode mode0 = insn_data[icode].operand[0].mode;
15303 machine_mode mode1 = insn_data[icode].operand[1].mode;
15304 machine_mode mode2 = insn_data[icode].operand[2].mode;
15306 /* Invalid arguments. Bail before doing anything stoopid! */
15307 if (arg0 == error_mark_node
15308 || arg1 == error_mark_node
15309 || arg2 == error_mark_node)
15310 return const0_rtx;
15312 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15313 op0 = copy_to_mode_reg (mode2, op0);
15314 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15315 op1 = copy_to_mode_reg (mode0, op1);
15316 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15317 op2 = copy_to_mode_reg (mode1, op2);
15319 pat = GEN_FCN (icode) (op1, op2, op0);
15320 if (pat)
15321 emit_insn (pat);
15322 return NULL_RTX;
15325 static rtx
15326 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15328 tree arg0 = CALL_EXPR_ARG (exp, 0);
15329 tree arg1 = CALL_EXPR_ARG (exp, 1);
15330 tree arg2 = CALL_EXPR_ARG (exp, 2);
15331 rtx op0 = expand_normal (arg0);
15332 rtx op1 = expand_normal (arg1);
15333 rtx op2 = expand_normal (arg2);
15334 rtx pat, addr;
15335 machine_mode tmode = insn_data[icode].operand[0].mode;
15336 machine_mode mode1 = Pmode;
15337 machine_mode mode2 = Pmode;
15339 /* Invalid arguments. Bail before doing anything stoopid! */
15340 if (arg0 == error_mark_node
15341 || arg1 == error_mark_node
15342 || arg2 == error_mark_node)
15343 return const0_rtx;
15345 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15346 op0 = copy_to_mode_reg (tmode, op0);
15348 op2 = copy_to_mode_reg (mode2, op2);
15350 if (op1 == const0_rtx)
15352 addr = gen_rtx_MEM (tmode, op2);
15354 else
15356 op1 = copy_to_mode_reg (mode1, op1);
15357 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15360 pat = GEN_FCN (icode) (addr, op0);
15361 if (pat)
15362 emit_insn (pat);
15363 return NULL_RTX;
15366 static rtx
15367 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15369 rtx pat;
15370 tree arg0 = CALL_EXPR_ARG (exp, 0);
15371 tree arg1 = CALL_EXPR_ARG (exp, 1);
15372 tree arg2 = CALL_EXPR_ARG (exp, 2);
15373 rtx op0 = expand_normal (arg0);
15374 rtx op1 = expand_normal (arg1);
15375 rtx op2 = expand_normal (arg2);
15376 machine_mode mode0 = insn_data[icode].operand[0].mode;
15377 machine_mode mode1 = insn_data[icode].operand[1].mode;
15378 machine_mode mode2 = insn_data[icode].operand[2].mode;
15380 if (icode == CODE_FOR_nothing)
15381 /* Builtin not supported on this processor. */
15382 return NULL_RTX;
15384 /* If we got invalid arguments bail out before generating bad rtl. */
15385 if (arg0 == error_mark_node
15386 || arg1 == error_mark_node
15387 || arg2 == error_mark_node)
15388 return NULL_RTX;
15390 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15391 op0 = copy_to_mode_reg (mode0, op0);
15392 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15393 op1 = copy_to_mode_reg (mode1, op1);
15394 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15395 op2 = copy_to_mode_reg (mode2, op2);
15397 pat = GEN_FCN (icode) (op0, op1, op2);
15398 if (pat)
15399 emit_insn (pat);
15401 return NULL_RTX;
15404 static rtx
15405 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15407 tree arg0 = CALL_EXPR_ARG (exp, 0);
15408 tree arg1 = CALL_EXPR_ARG (exp, 1);
15409 tree arg2 = CALL_EXPR_ARG (exp, 2);
15410 rtx op0 = expand_normal (arg0);
15411 rtx op1 = expand_normal (arg1);
15412 rtx op2 = expand_normal (arg2);
15413 rtx pat, addr, rawaddr;
15414 machine_mode tmode = insn_data[icode].operand[0].mode;
15415 machine_mode smode = insn_data[icode].operand[1].mode;
15416 machine_mode mode1 = Pmode;
15417 machine_mode mode2 = Pmode;
15419 /* Invalid arguments. Bail before doing anything stoopid! */
15420 if (arg0 == error_mark_node
15421 || arg1 == error_mark_node
15422 || arg2 == error_mark_node)
15423 return const0_rtx;
15425 op2 = copy_to_mode_reg (mode2, op2);
15427 /* For STVX, express the RTL accurately by ANDing the address with -16.
15428 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15429 so the raw address is fine. */
15430 if (icode == CODE_FOR_altivec_stvx_v2df_2op
15431 || icode == CODE_FOR_altivec_stvx_v2di_2op
15432 || icode == CODE_FOR_altivec_stvx_v4sf_2op
15433 || icode == CODE_FOR_altivec_stvx_v4si_2op
15434 || icode == CODE_FOR_altivec_stvx_v8hi_2op
15435 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15437 if (op1 == const0_rtx)
15438 rawaddr = op2;
15439 else
15441 op1 = copy_to_mode_reg (mode1, op1);
15442 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15445 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15446 addr = gen_rtx_MEM (tmode, addr);
15448 op0 = copy_to_mode_reg (tmode, op0);
15450 /* For -maltivec=be, emit a permute to swap the elements, followed
15451 by the store. */
15452 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15454 rtx temp = gen_reg_rtx (tmode);
15455 rtx sel = swap_selector_for_mode (tmode);
15456 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15457 UNSPEC_VPERM);
15458 emit_insn (gen_rtx_SET (temp, vperm));
15459 emit_insn (gen_rtx_SET (addr, temp));
15461 else
15462 emit_insn (gen_rtx_SET (addr, op0));
15464 else
15466 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15467 op0 = copy_to_mode_reg (smode, op0);
15469 if (op1 == const0_rtx)
15470 addr = gen_rtx_MEM (tmode, op2);
15471 else
15473 op1 = copy_to_mode_reg (mode1, op1);
15474 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15477 pat = GEN_FCN (icode) (addr, op0);
15478 if (pat)
15479 emit_insn (pat);
15482 return NULL_RTX;
15485 /* Return the appropriate SPR number associated with the given builtin. */
15486 static inline HOST_WIDE_INT
15487 htm_spr_num (enum rs6000_builtins code)
15489 if (code == HTM_BUILTIN_GET_TFHAR
15490 || code == HTM_BUILTIN_SET_TFHAR)
15491 return TFHAR_SPR;
15492 else if (code == HTM_BUILTIN_GET_TFIAR
15493 || code == HTM_BUILTIN_SET_TFIAR)
15494 return TFIAR_SPR;
15495 else if (code == HTM_BUILTIN_GET_TEXASR
15496 || code == HTM_BUILTIN_SET_TEXASR)
15497 return TEXASR_SPR;
15498 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15499 || code == HTM_BUILTIN_SET_TEXASRU);
15500 return TEXASRU_SPR;
15503 /* Return the appropriate SPR regno associated with the given builtin. */
15504 static inline HOST_WIDE_INT
15505 htm_spr_regno (enum rs6000_builtins code)
15507 if (code == HTM_BUILTIN_GET_TFHAR
15508 || code == HTM_BUILTIN_SET_TFHAR)
15509 return TFHAR_REGNO;
15510 else if (code == HTM_BUILTIN_GET_TFIAR
15511 || code == HTM_BUILTIN_SET_TFIAR)
15512 return TFIAR_REGNO;
15513 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15514 || code == HTM_BUILTIN_SET_TEXASR
15515 || code == HTM_BUILTIN_GET_TEXASRU
15516 || code == HTM_BUILTIN_SET_TEXASRU);
15517 return TEXASR_REGNO;
15520 /* Return the correct ICODE value depending on whether we are
15521 setting or reading the HTM SPRs. */
15522 static inline enum insn_code
15523 rs6000_htm_spr_icode (bool nonvoid)
15525 if (nonvoid)
15526 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15527 else
15528 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15531 /* Expand the HTM builtin in EXP and store the result in TARGET.
15532 Store true in *EXPANDEDP if we found a builtin to expand. */
15533 static rtx
15534 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15536 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15537 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15538 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15539 const struct builtin_description *d;
15540 size_t i;
15542 *expandedp = true;
15544 if (!TARGET_POWERPC64
15545 && (fcode == HTM_BUILTIN_TABORTDC
15546 || fcode == HTM_BUILTIN_TABORTDCI))
15548 size_t uns_fcode = (size_t)fcode;
15549 const char *name = rs6000_builtin_info[uns_fcode].name;
15550 error ("builtin %s is only valid in 64-bit mode", name);
15551 return const0_rtx;
15554 /* Expand the HTM builtins. */
15555 d = bdesc_htm;
15556 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15557 if (d->code == fcode)
15559 rtx op[MAX_HTM_OPERANDS], pat;
15560 int nopnds = 0;
15561 tree arg;
15562 call_expr_arg_iterator iter;
15563 unsigned attr = rs6000_builtin_info[fcode].attr;
15564 enum insn_code icode = d->icode;
15565 const struct insn_operand_data *insn_op;
15566 bool uses_spr = (attr & RS6000_BTC_SPR);
15567 rtx cr = NULL_RTX;
15569 if (uses_spr)
15570 icode = rs6000_htm_spr_icode (nonvoid);
15571 insn_op = &insn_data[icode].operand[0];
15573 if (nonvoid)
15575 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
15576 if (!target
15577 || GET_MODE (target) != tmode
15578 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15579 target = gen_reg_rtx (tmode);
15580 if (uses_spr)
15581 op[nopnds++] = target;
15584 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15586 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15587 return const0_rtx;
15589 insn_op = &insn_data[icode].operand[nopnds];
15591 op[nopnds] = expand_normal (arg);
15593 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15595 if (!strcmp (insn_op->constraint, "n"))
15597 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15598 if (!CONST_INT_P (op[nopnds]))
15599 error ("argument %d must be an unsigned literal", arg_num);
15600 else
15601 error ("argument %d is an unsigned literal that is "
15602 "out of range", arg_num);
15603 return const0_rtx;
15605 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15608 nopnds++;
15611 /* Handle the builtins for extended mnemonics. These accept
15612 no arguments, but map to builtins that take arguments. */
15613 switch (fcode)
15615 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15616 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15617 op[nopnds++] = GEN_INT (1);
15618 if (flag_checking)
15619 attr |= RS6000_BTC_UNARY;
15620 break;
15621 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15622 op[nopnds++] = GEN_INT (0);
15623 if (flag_checking)
15624 attr |= RS6000_BTC_UNARY;
15625 break;
15626 default:
15627 break;
15630 /* If this builtin accesses SPRs, then pass in the appropriate
15631 SPR number and SPR regno as the last two operands. */
15632 if (uses_spr)
15634 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15635 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15636 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15638 /* If this builtin accesses a CR, then pass in a scratch
15639 CR as the last operand. */
15640 else if (attr & RS6000_BTC_CR)
15641 { cr = gen_reg_rtx (CCmode);
15642 op[nopnds++] = cr;
15645 if (flag_checking)
15647 int expected_nopnds = 0;
15648 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15649 expected_nopnds = 1;
15650 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15651 expected_nopnds = 2;
15652 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15653 expected_nopnds = 3;
15654 if (!(attr & RS6000_BTC_VOID))
15655 expected_nopnds += 1;
15656 if (uses_spr)
15657 expected_nopnds += 2;
15659 gcc_assert (nopnds == expected_nopnds
15660 && nopnds <= MAX_HTM_OPERANDS);
15663 switch (nopnds)
15665 case 1:
15666 pat = GEN_FCN (icode) (op[0]);
15667 break;
15668 case 2:
15669 pat = GEN_FCN (icode) (op[0], op[1]);
15670 break;
15671 case 3:
15672 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15673 break;
15674 case 4:
15675 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15676 break;
15677 default:
15678 gcc_unreachable ();
15680 if (!pat)
15681 return NULL_RTX;
15682 emit_insn (pat);
15684 if (attr & RS6000_BTC_CR)
15686 if (fcode == HTM_BUILTIN_TBEGIN)
15688 /* Emit code to set TARGET to true or false depending on
15689 whether the tbegin. instruction successfully or failed
15690 to start a transaction. We do this by placing the 1's
15691 complement of CR's EQ bit into TARGET. */
15692 rtx scratch = gen_reg_rtx (SImode);
15693 emit_insn (gen_rtx_SET (scratch,
15694 gen_rtx_EQ (SImode, cr,
15695 const0_rtx)));
15696 emit_insn (gen_rtx_SET (target,
15697 gen_rtx_XOR (SImode, scratch,
15698 GEN_INT (1))));
15700 else
15702 /* Emit code to copy the 4-bit condition register field
15703 CR into the least significant end of register TARGET. */
15704 rtx scratch1 = gen_reg_rtx (SImode);
15705 rtx scratch2 = gen_reg_rtx (SImode);
15706 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15707 emit_insn (gen_movcc (subreg, cr));
15708 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15709 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15713 if (nonvoid)
15714 return target;
15715 return const0_rtx;
15718 *expandedp = false;
15719 return NULL_RTX;
15722 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15724 static rtx
15725 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15726 rtx target)
15728 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15729 if (fcode == RS6000_BUILTIN_CPU_INIT)
15730 return const0_rtx;
15732 if (target == 0 || GET_MODE (target) != SImode)
15733 target = gen_reg_rtx (SImode);
15735 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15736 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15737 if (TREE_CODE (arg) != STRING_CST)
15739 error ("builtin %s only accepts a string argument",
15740 rs6000_builtin_info[(size_t) fcode].name);
15741 return const0_rtx;
15744 if (fcode == RS6000_BUILTIN_CPU_IS)
15746 const char *cpu = TREE_STRING_POINTER (arg);
15747 rtx cpuid = NULL_RTX;
15748 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15749 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15751 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15752 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15753 break;
15755 if (cpuid == NULL_RTX)
15757 /* Invalid CPU argument. */
15758 error ("cpu %s is an invalid argument to builtin %s",
15759 cpu, rs6000_builtin_info[(size_t) fcode].name);
15760 return const0_rtx;
15763 rtx platform = gen_reg_rtx (SImode);
15764 rtx tcbmem = gen_const_mem (SImode,
15765 gen_rtx_PLUS (Pmode,
15766 gen_rtx_REG (Pmode, TLS_REGNUM),
15767 GEN_INT (TCB_PLATFORM_OFFSET)));
15768 emit_move_insn (platform, tcbmem);
15769 emit_insn (gen_eqsi3 (target, platform, cpuid));
15771 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15773 const char *hwcap = TREE_STRING_POINTER (arg);
15774 rtx mask = NULL_RTX;
15775 int hwcap_offset;
15776 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15777 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15779 mask = GEN_INT (cpu_supports_info[i].mask);
15780 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15781 break;
15783 if (mask == NULL_RTX)
15785 /* Invalid HWCAP argument. */
15786 error ("hwcap %s is an invalid argument to builtin %s",
15787 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15788 return const0_rtx;
15791 rtx tcb_hwcap = gen_reg_rtx (SImode);
15792 rtx tcbmem = gen_const_mem (SImode,
15793 gen_rtx_PLUS (Pmode,
15794 gen_rtx_REG (Pmode, TLS_REGNUM),
15795 GEN_INT (hwcap_offset)));
15796 emit_move_insn (tcb_hwcap, tcbmem);
15797 rtx scratch1 = gen_reg_rtx (SImode);
15798 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15799 rtx scratch2 = gen_reg_rtx (SImode);
15800 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15801 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15804 /* Record that we have expanded a CPU builtin, so that we can later
15805 emit a reference to the special symbol exported by LIBC to ensure we
15806 do not link against an old LIBC that doesn't support this feature. */
15807 cpu_builtin_p = true;
15809 #else
15810 /* For old LIBCs, always return FALSE. */
15811 emit_move_insn (target, GEN_INT (0));
15812 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15814 return target;
15817 static rtx
15818 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15820 rtx pat;
15821 tree arg0 = CALL_EXPR_ARG (exp, 0);
15822 tree arg1 = CALL_EXPR_ARG (exp, 1);
15823 tree arg2 = CALL_EXPR_ARG (exp, 2);
15824 rtx op0 = expand_normal (arg0);
15825 rtx op1 = expand_normal (arg1);
15826 rtx op2 = expand_normal (arg2);
15827 machine_mode tmode = insn_data[icode].operand[0].mode;
15828 machine_mode mode0 = insn_data[icode].operand[1].mode;
15829 machine_mode mode1 = insn_data[icode].operand[2].mode;
15830 machine_mode mode2 = insn_data[icode].operand[3].mode;
15832 if (icode == CODE_FOR_nothing)
15833 /* Builtin not supported on this processor. */
15834 return 0;
15836 /* If we got invalid arguments bail out before generating bad rtl. */
15837 if (arg0 == error_mark_node
15838 || arg1 == error_mark_node
15839 || arg2 == error_mark_node)
15840 return const0_rtx;
15842 /* Check and prepare argument depending on the instruction code.
15844 Note that a switch statement instead of the sequence of tests
15845 would be incorrect as many of the CODE_FOR values could be
15846 CODE_FOR_nothing and that would yield multiple alternatives
15847 with identical values. We'd never reach here at runtime in
15848 this case. */
15849 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15850 || icode == CODE_FOR_altivec_vsldoi_v2df
15851 || icode == CODE_FOR_altivec_vsldoi_v4si
15852 || icode == CODE_FOR_altivec_vsldoi_v8hi
15853 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15855 /* Only allow 4-bit unsigned literals. */
15856 STRIP_NOPS (arg2);
15857 if (TREE_CODE (arg2) != INTEGER_CST
15858 || TREE_INT_CST_LOW (arg2) & ~0xf)
15860 error ("argument 3 must be a 4-bit unsigned literal");
15861 return CONST0_RTX (tmode);
15864 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15865 || icode == CODE_FOR_vsx_xxpermdi_v2di
15866 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15867 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15868 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15869 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15870 || icode == CODE_FOR_vsx_xxpermdi_v4si
15871 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15872 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15873 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15874 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15875 || icode == CODE_FOR_vsx_xxsldwi_v4si
15876 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15877 || icode == CODE_FOR_vsx_xxsldwi_v2di
15878 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15880 /* Only allow 2-bit unsigned literals. */
15881 STRIP_NOPS (arg2);
15882 if (TREE_CODE (arg2) != INTEGER_CST
15883 || TREE_INT_CST_LOW (arg2) & ~0x3)
15885 error ("argument 3 must be a 2-bit unsigned literal");
15886 return CONST0_RTX (tmode);
15889 else if (icode == CODE_FOR_vsx_set_v2df
15890 || icode == CODE_FOR_vsx_set_v2di
15891 || icode == CODE_FOR_bcdadd
15892 || icode == CODE_FOR_bcdadd_lt
15893 || icode == CODE_FOR_bcdadd_eq
15894 || icode == CODE_FOR_bcdadd_gt
15895 || icode == CODE_FOR_bcdsub
15896 || icode == CODE_FOR_bcdsub_lt
15897 || icode == CODE_FOR_bcdsub_eq
15898 || icode == CODE_FOR_bcdsub_gt)
15900 /* Only allow 1-bit unsigned literals. */
15901 STRIP_NOPS (arg2);
15902 if (TREE_CODE (arg2) != INTEGER_CST
15903 || TREE_INT_CST_LOW (arg2) & ~0x1)
15905 error ("argument 3 must be a 1-bit unsigned literal");
15906 return CONST0_RTX (tmode);
15909 else if (icode == CODE_FOR_dfp_ddedpd_dd
15910 || icode == CODE_FOR_dfp_ddedpd_td)
15912 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15913 STRIP_NOPS (arg0);
15914 if (TREE_CODE (arg0) != INTEGER_CST
15915 || TREE_INT_CST_LOW (arg2) & ~0x3)
15917 error ("argument 1 must be 0 or 2");
15918 return CONST0_RTX (tmode);
15921 else if (icode == CODE_FOR_dfp_denbcd_dd
15922 || icode == CODE_FOR_dfp_denbcd_td)
15924 /* Only allow 1-bit unsigned literals. */
15925 STRIP_NOPS (arg0);
15926 if (TREE_CODE (arg0) != INTEGER_CST
15927 || TREE_INT_CST_LOW (arg0) & ~0x1)
15929 error ("argument 1 must be a 1-bit unsigned literal");
15930 return CONST0_RTX (tmode);
15933 else if (icode == CODE_FOR_dfp_dscli_dd
15934 || icode == CODE_FOR_dfp_dscli_td
15935 || icode == CODE_FOR_dfp_dscri_dd
15936 || icode == CODE_FOR_dfp_dscri_td)
15938 /* Only allow 6-bit unsigned literals. */
15939 STRIP_NOPS (arg1);
15940 if (TREE_CODE (arg1) != INTEGER_CST
15941 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15943 error ("argument 2 must be a 6-bit unsigned literal");
15944 return CONST0_RTX (tmode);
15947 else if (icode == CODE_FOR_crypto_vshasigmaw
15948 || icode == CODE_FOR_crypto_vshasigmad)
15950 /* Check whether the 2nd and 3rd arguments are integer constants and in
15951 range and prepare arguments. */
15952 STRIP_NOPS (arg1);
15953 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
15955 error ("argument 2 must be 0 or 1");
15956 return CONST0_RTX (tmode);
15959 STRIP_NOPS (arg2);
15960 if (TREE_CODE (arg2) != INTEGER_CST
15961 || wi::geu_p (wi::to_wide (arg2), 16))
15963 error ("argument 3 must be in the range 0..15");
15964 return CONST0_RTX (tmode);
15968 if (target == 0
15969 || GET_MODE (target) != tmode
15970 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15971 target = gen_reg_rtx (tmode);
15973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15974 op0 = copy_to_mode_reg (mode0, op0);
15975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15976 op1 = copy_to_mode_reg (mode1, op1);
15977 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15978 op2 = copy_to_mode_reg (mode2, op2);
15980 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15981 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15982 else
15983 pat = GEN_FCN (icode) (target, op0, op1, op2);
15984 if (! pat)
15985 return 0;
15986 emit_insn (pat);
15988 return target;
15991 /* Expand the lvx builtins. */
15992 static rtx
15993 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15995 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15996 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15997 tree arg0;
15998 machine_mode tmode, mode0;
15999 rtx pat, op0;
16000 enum insn_code icode;
16002 switch (fcode)
16004 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
16005 icode = CODE_FOR_vector_altivec_load_v16qi;
16006 break;
16007 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
16008 icode = CODE_FOR_vector_altivec_load_v8hi;
16009 break;
16010 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
16011 icode = CODE_FOR_vector_altivec_load_v4si;
16012 break;
16013 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
16014 icode = CODE_FOR_vector_altivec_load_v4sf;
16015 break;
16016 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
16017 icode = CODE_FOR_vector_altivec_load_v2df;
16018 break;
16019 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
16020 icode = CODE_FOR_vector_altivec_load_v2di;
16021 break;
16022 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
16023 icode = CODE_FOR_vector_altivec_load_v1ti;
16024 break;
16025 default:
16026 *expandedp = false;
16027 return NULL_RTX;
16030 *expandedp = true;
16032 arg0 = CALL_EXPR_ARG (exp, 0);
16033 op0 = expand_normal (arg0);
16034 tmode = insn_data[icode].operand[0].mode;
16035 mode0 = insn_data[icode].operand[1].mode;
16037 if (target == 0
16038 || GET_MODE (target) != tmode
16039 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16040 target = gen_reg_rtx (tmode);
16042 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16043 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16045 pat = GEN_FCN (icode) (target, op0);
16046 if (! pat)
16047 return 0;
16048 emit_insn (pat);
16049 return target;
16052 /* Expand the stvx builtins. */
16053 static rtx
16054 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16055 bool *expandedp)
16057 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16058 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16059 tree arg0, arg1;
16060 machine_mode mode0, mode1;
16061 rtx pat, op0, op1;
16062 enum insn_code icode;
16064 switch (fcode)
16066 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
16067 icode = CODE_FOR_vector_altivec_store_v16qi;
16068 break;
16069 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
16070 icode = CODE_FOR_vector_altivec_store_v8hi;
16071 break;
16072 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
16073 icode = CODE_FOR_vector_altivec_store_v4si;
16074 break;
16075 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
16076 icode = CODE_FOR_vector_altivec_store_v4sf;
16077 break;
16078 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
16079 icode = CODE_FOR_vector_altivec_store_v2df;
16080 break;
16081 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
16082 icode = CODE_FOR_vector_altivec_store_v2di;
16083 break;
16084 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
16085 icode = CODE_FOR_vector_altivec_store_v1ti;
16086 break;
16087 default:
16088 *expandedp = false;
16089 return NULL_RTX;
16092 arg0 = CALL_EXPR_ARG (exp, 0);
16093 arg1 = CALL_EXPR_ARG (exp, 1);
16094 op0 = expand_normal (arg0);
16095 op1 = expand_normal (arg1);
16096 mode0 = insn_data[icode].operand[0].mode;
16097 mode1 = insn_data[icode].operand[1].mode;
16099 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16100 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16101 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16102 op1 = copy_to_mode_reg (mode1, op1);
16104 pat = GEN_FCN (icode) (op0, op1);
16105 if (pat)
16106 emit_insn (pat);
16108 *expandedp = true;
16109 return NULL_RTX;
16112 /* Expand the dst builtins. */
16113 static rtx
16114 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16115 bool *expandedp)
16117 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16118 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16119 tree arg0, arg1, arg2;
16120 machine_mode mode0, mode1;
16121 rtx pat, op0, op1, op2;
16122 const struct builtin_description *d;
16123 size_t i;
16125 *expandedp = false;
16127 /* Handle DST variants. */
16128 d = bdesc_dst;
16129 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16130 if (d->code == fcode)
16132 arg0 = CALL_EXPR_ARG (exp, 0);
16133 arg1 = CALL_EXPR_ARG (exp, 1);
16134 arg2 = CALL_EXPR_ARG (exp, 2);
16135 op0 = expand_normal (arg0);
16136 op1 = expand_normal (arg1);
16137 op2 = expand_normal (arg2);
16138 mode0 = insn_data[d->icode].operand[0].mode;
16139 mode1 = insn_data[d->icode].operand[1].mode;
16141 /* Invalid arguments, bail out before generating bad rtl. */
16142 if (arg0 == error_mark_node
16143 || arg1 == error_mark_node
16144 || arg2 == error_mark_node)
16145 return const0_rtx;
16147 *expandedp = true;
16148 STRIP_NOPS (arg2);
16149 if (TREE_CODE (arg2) != INTEGER_CST
16150 || TREE_INT_CST_LOW (arg2) & ~0x3)
16152 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
16153 return const0_rtx;
16156 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16157 op0 = copy_to_mode_reg (Pmode, op0);
16158 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16159 op1 = copy_to_mode_reg (mode1, op1);
16161 pat = GEN_FCN (d->icode) (op0, op1, op2);
16162 if (pat != 0)
16163 emit_insn (pat);
16165 return NULL_RTX;
16168 return NULL_RTX;
16171 /* Expand vec_init builtin. */
16172 static rtx
16173 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
16175 machine_mode tmode = TYPE_MODE (type);
16176 machine_mode inner_mode = GET_MODE_INNER (tmode);
16177 int i, n_elt = GET_MODE_NUNITS (tmode);
16179 gcc_assert (VECTOR_MODE_P (tmode));
16180 gcc_assert (n_elt == call_expr_nargs (exp));
16182 if (!target || !register_operand (target, tmode))
16183 target = gen_reg_rtx (tmode);
16185 /* If we have a vector compromised of a single element, such as V1TImode, do
16186 the initialization directly. */
16187 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
16189 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
16190 emit_move_insn (target, gen_lowpart (tmode, x));
16192 else
16194 rtvec v = rtvec_alloc (n_elt);
16196 for (i = 0; i < n_elt; ++i)
16198 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
16199 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16202 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
16205 return target;
16208 /* Return the integer constant in ARG. Constrain it to be in the range
16209 of the subparts of VEC_TYPE; issue an error if not. */
16211 static int
16212 get_element_number (tree vec_type, tree arg)
16214 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16216 if (!tree_fits_uhwi_p (arg)
16217 || (elt = tree_to_uhwi (arg), elt > max))
16219 error ("selector must be an integer constant in the range 0..%wi", max);
16220 return 0;
16223 return elt;
16226 /* Expand vec_set builtin. */
16227 static rtx
16228 altivec_expand_vec_set_builtin (tree exp)
16230 machine_mode tmode, mode1;
16231 tree arg0, arg1, arg2;
16232 int elt;
16233 rtx op0, op1;
16235 arg0 = CALL_EXPR_ARG (exp, 0);
16236 arg1 = CALL_EXPR_ARG (exp, 1);
16237 arg2 = CALL_EXPR_ARG (exp, 2);
16239 tmode = TYPE_MODE (TREE_TYPE (arg0));
16240 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16241 gcc_assert (VECTOR_MODE_P (tmode));
16243 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
16244 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
16245 elt = get_element_number (TREE_TYPE (arg0), arg2);
16247 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16248 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16250 op0 = force_reg (tmode, op0);
16251 op1 = force_reg (mode1, op1);
16253 rs6000_expand_vector_set (op0, op1, elt);
16255 return op0;
16258 /* Expand vec_ext builtin. */
16259 static rtx
16260 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16262 machine_mode tmode, mode0;
16263 tree arg0, arg1;
16264 rtx op0;
16265 rtx op1;
16267 arg0 = CALL_EXPR_ARG (exp, 0);
16268 arg1 = CALL_EXPR_ARG (exp, 1);
16270 op0 = expand_normal (arg0);
16271 op1 = expand_normal (arg1);
16273 /* Call get_element_number to validate arg1 if it is a constant. */
16274 if (TREE_CODE (arg1) == INTEGER_CST)
16275 (void) get_element_number (TREE_TYPE (arg0), arg1);
16277 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16278 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16279 gcc_assert (VECTOR_MODE_P (mode0));
16281 op0 = force_reg (mode0, op0);
16283 if (optimize || !target || !register_operand (target, tmode))
16284 target = gen_reg_rtx (tmode);
16286 rs6000_expand_vector_extract (target, op0, op1);
16288 return target;
16291 /* Expand the builtin in EXP and store the result in TARGET. Store
16292 true in *EXPANDEDP if we found a builtin to expand. */
16293 static rtx
16294 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16296 const struct builtin_description *d;
16297 size_t i;
16298 enum insn_code icode;
16299 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16300 tree arg0, arg1, arg2;
16301 rtx op0, pat;
16302 machine_mode tmode, mode0;
16303 enum rs6000_builtins fcode
16304 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16306 if (rs6000_overloaded_builtin_p (fcode))
16308 *expandedp = true;
16309 error ("unresolved overload for Altivec builtin %qF", fndecl);
16311 /* Given it is invalid, just generate a normal call. */
16312 return expand_call (exp, target, false);
16315 target = altivec_expand_ld_builtin (exp, target, expandedp);
16316 if (*expandedp)
16317 return target;
16319 target = altivec_expand_st_builtin (exp, target, expandedp);
16320 if (*expandedp)
16321 return target;
16323 target = altivec_expand_dst_builtin (exp, target, expandedp);
16324 if (*expandedp)
16325 return target;
16327 *expandedp = true;
16329 switch (fcode)
16331 case ALTIVEC_BUILTIN_STVX_V2DF:
16332 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16333 case ALTIVEC_BUILTIN_STVX_V2DI:
16334 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16335 case ALTIVEC_BUILTIN_STVX_V4SF:
16336 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16337 case ALTIVEC_BUILTIN_STVX:
16338 case ALTIVEC_BUILTIN_STVX_V4SI:
16339 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16340 case ALTIVEC_BUILTIN_STVX_V8HI:
16341 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16342 case ALTIVEC_BUILTIN_STVX_V16QI:
16343 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16344 case ALTIVEC_BUILTIN_STVEBX:
16345 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16346 case ALTIVEC_BUILTIN_STVEHX:
16347 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16348 case ALTIVEC_BUILTIN_STVEWX:
16349 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16350 case ALTIVEC_BUILTIN_STVXL_V2DF:
16351 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16352 case ALTIVEC_BUILTIN_STVXL_V2DI:
16353 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16354 case ALTIVEC_BUILTIN_STVXL_V4SF:
16355 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16356 case ALTIVEC_BUILTIN_STVXL:
16357 case ALTIVEC_BUILTIN_STVXL_V4SI:
16358 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16359 case ALTIVEC_BUILTIN_STVXL_V8HI:
16360 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16361 case ALTIVEC_BUILTIN_STVXL_V16QI:
16362 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16364 case ALTIVEC_BUILTIN_STVLX:
16365 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16366 case ALTIVEC_BUILTIN_STVLXL:
16367 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16368 case ALTIVEC_BUILTIN_STVRX:
16369 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16370 case ALTIVEC_BUILTIN_STVRXL:
16371 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16373 case P9V_BUILTIN_STXVL:
16374 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16376 case VSX_BUILTIN_STXVD2X_V1TI:
16377 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16378 case VSX_BUILTIN_STXVD2X_V2DF:
16379 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16380 case VSX_BUILTIN_STXVD2X_V2DI:
16381 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16382 case VSX_BUILTIN_STXVW4X_V4SF:
16383 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16384 case VSX_BUILTIN_STXVW4X_V4SI:
16385 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16386 case VSX_BUILTIN_STXVW4X_V8HI:
16387 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16388 case VSX_BUILTIN_STXVW4X_V16QI:
16389 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16391 /* For the following on big endian, it's ok to use any appropriate
16392 unaligned-supporting store, so use a generic expander. For
16393 little-endian, the exact element-reversing instruction must
16394 be used. */
16395 case VSX_BUILTIN_ST_ELEMREV_V2DF:
16397 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16398 : CODE_FOR_vsx_st_elemrev_v2df);
16399 return altivec_expand_stv_builtin (code, exp);
16401 case VSX_BUILTIN_ST_ELEMREV_V2DI:
16403 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16404 : CODE_FOR_vsx_st_elemrev_v2di);
16405 return altivec_expand_stv_builtin (code, exp);
16407 case VSX_BUILTIN_ST_ELEMREV_V4SF:
16409 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16410 : CODE_FOR_vsx_st_elemrev_v4sf);
16411 return altivec_expand_stv_builtin (code, exp);
16413 case VSX_BUILTIN_ST_ELEMREV_V4SI:
16415 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16416 : CODE_FOR_vsx_st_elemrev_v4si);
16417 return altivec_expand_stv_builtin (code, exp);
16419 case VSX_BUILTIN_ST_ELEMREV_V8HI:
16421 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16422 : CODE_FOR_vsx_st_elemrev_v8hi);
16423 return altivec_expand_stv_builtin (code, exp);
16425 case VSX_BUILTIN_ST_ELEMREV_V16QI:
16427 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16428 : CODE_FOR_vsx_st_elemrev_v16qi);
16429 return altivec_expand_stv_builtin (code, exp);
16432 case ALTIVEC_BUILTIN_MFVSCR:
16433 icode = CODE_FOR_altivec_mfvscr;
16434 tmode = insn_data[icode].operand[0].mode;
16436 if (target == 0
16437 || GET_MODE (target) != tmode
16438 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16439 target = gen_reg_rtx (tmode);
16441 pat = GEN_FCN (icode) (target);
16442 if (! pat)
16443 return 0;
16444 emit_insn (pat);
16445 return target;
16447 case ALTIVEC_BUILTIN_MTVSCR:
16448 icode = CODE_FOR_altivec_mtvscr;
16449 arg0 = CALL_EXPR_ARG (exp, 0);
16450 op0 = expand_normal (arg0);
16451 mode0 = insn_data[icode].operand[0].mode;
16453 /* If we got invalid arguments bail out before generating bad rtl. */
16454 if (arg0 == error_mark_node)
16455 return const0_rtx;
16457 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16458 op0 = copy_to_mode_reg (mode0, op0);
16460 pat = GEN_FCN (icode) (op0);
16461 if (pat)
16462 emit_insn (pat);
16463 return NULL_RTX;
16465 case ALTIVEC_BUILTIN_DSSALL:
16466 emit_insn (gen_altivec_dssall ());
16467 return NULL_RTX;
16469 case ALTIVEC_BUILTIN_DSS:
16470 icode = CODE_FOR_altivec_dss;
16471 arg0 = CALL_EXPR_ARG (exp, 0);
16472 STRIP_NOPS (arg0);
16473 op0 = expand_normal (arg0);
16474 mode0 = insn_data[icode].operand[0].mode;
16476 /* If we got invalid arguments bail out before generating bad rtl. */
16477 if (arg0 == error_mark_node)
16478 return const0_rtx;
16480 if (TREE_CODE (arg0) != INTEGER_CST
16481 || TREE_INT_CST_LOW (arg0) & ~0x3)
16483 error ("argument to dss must be a 2-bit unsigned literal");
16484 return const0_rtx;
16487 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16488 op0 = copy_to_mode_reg (mode0, op0);
16490 emit_insn (gen_altivec_dss (op0));
16491 return NULL_RTX;
16493 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16494 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16495 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16496 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16497 case VSX_BUILTIN_VEC_INIT_V2DF:
16498 case VSX_BUILTIN_VEC_INIT_V2DI:
16499 case VSX_BUILTIN_VEC_INIT_V1TI:
16500 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16502 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16503 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16504 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16505 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16506 case VSX_BUILTIN_VEC_SET_V2DF:
16507 case VSX_BUILTIN_VEC_SET_V2DI:
16508 case VSX_BUILTIN_VEC_SET_V1TI:
16509 return altivec_expand_vec_set_builtin (exp);
16511 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16512 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16513 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16514 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16515 case VSX_BUILTIN_VEC_EXT_V2DF:
16516 case VSX_BUILTIN_VEC_EXT_V2DI:
16517 case VSX_BUILTIN_VEC_EXT_V1TI:
16518 return altivec_expand_vec_ext_builtin (exp, target);
16520 case P9V_BUILTIN_VEXTRACT4B:
16521 case P9V_BUILTIN_VEC_VEXTRACT4B:
16522 arg1 = CALL_EXPR_ARG (exp, 1);
16523 STRIP_NOPS (arg1);
16525 /* Generate a normal call if it is invalid. */
16526 if (arg1 == error_mark_node)
16527 return expand_call (exp, target, false);
16529 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16531 error ("second argument to vec_vextract4b must be 0..12");
16532 return expand_call (exp, target, false);
16534 break;
16536 case P9V_BUILTIN_VINSERT4B:
16537 case P9V_BUILTIN_VINSERT4B_DI:
16538 case P9V_BUILTIN_VEC_VINSERT4B:
16539 arg2 = CALL_EXPR_ARG (exp, 2);
16540 STRIP_NOPS (arg2);
16542 /* Generate a normal call if it is invalid. */
16543 if (arg2 == error_mark_node)
16544 return expand_call (exp, target, false);
16546 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16548 error ("third argument to vec_vinsert4b must be 0..12");
16549 return expand_call (exp, target, false);
16551 break;
16553 default:
16554 break;
16555 /* Fall through. */
16558 /* Expand abs* operations. */
16559 d = bdesc_abs;
16560 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16561 if (d->code == fcode)
16562 return altivec_expand_abs_builtin (d->icode, exp, target);
16564 /* Expand the AltiVec predicates. */
16565 d = bdesc_altivec_preds;
16566 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16567 if (d->code == fcode)
16568 return altivec_expand_predicate_builtin (d->icode, exp, target);
16570 /* LV* are funky. We initialized them differently. */
16571 switch (fcode)
16573 case ALTIVEC_BUILTIN_LVSL:
16574 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16575 exp, target, false);
16576 case ALTIVEC_BUILTIN_LVSR:
16577 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16578 exp, target, false);
16579 case ALTIVEC_BUILTIN_LVEBX:
16580 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16581 exp, target, false);
16582 case ALTIVEC_BUILTIN_LVEHX:
16583 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16584 exp, target, false);
16585 case ALTIVEC_BUILTIN_LVEWX:
16586 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16587 exp, target, false);
16588 case ALTIVEC_BUILTIN_LVXL_V2DF:
16589 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16590 exp, target, false);
16591 case ALTIVEC_BUILTIN_LVXL_V2DI:
16592 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16593 exp, target, false);
16594 case ALTIVEC_BUILTIN_LVXL_V4SF:
16595 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16596 exp, target, false);
16597 case ALTIVEC_BUILTIN_LVXL:
16598 case ALTIVEC_BUILTIN_LVXL_V4SI:
16599 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16600 exp, target, false);
16601 case ALTIVEC_BUILTIN_LVXL_V8HI:
16602 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16603 exp, target, false);
16604 case ALTIVEC_BUILTIN_LVXL_V16QI:
16605 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16606 exp, target, false);
16607 case ALTIVEC_BUILTIN_LVX_V2DF:
16608 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16609 exp, target, false);
16610 case ALTIVEC_BUILTIN_LVX_V2DI:
16611 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16612 exp, target, false);
16613 case ALTIVEC_BUILTIN_LVX_V4SF:
16614 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16615 exp, target, false);
16616 case ALTIVEC_BUILTIN_LVX:
16617 case ALTIVEC_BUILTIN_LVX_V4SI:
16618 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16619 exp, target, false);
16620 case ALTIVEC_BUILTIN_LVX_V8HI:
16621 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16622 exp, target, false);
16623 case ALTIVEC_BUILTIN_LVX_V16QI:
16624 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16625 exp, target, false);
16626 case ALTIVEC_BUILTIN_LVLX:
16627 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16628 exp, target, true);
16629 case ALTIVEC_BUILTIN_LVLXL:
16630 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16631 exp, target, true);
16632 case ALTIVEC_BUILTIN_LVRX:
16633 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16634 exp, target, true);
16635 case ALTIVEC_BUILTIN_LVRXL:
16636 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16637 exp, target, true);
16638 case VSX_BUILTIN_LXVD2X_V1TI:
16639 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16640 exp, target, false);
16641 case VSX_BUILTIN_LXVD2X_V2DF:
16642 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16643 exp, target, false);
16644 case VSX_BUILTIN_LXVD2X_V2DI:
16645 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16646 exp, target, false);
16647 case VSX_BUILTIN_LXVW4X_V4SF:
16648 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16649 exp, target, false);
16650 case VSX_BUILTIN_LXVW4X_V4SI:
16651 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16652 exp, target, false);
16653 case VSX_BUILTIN_LXVW4X_V8HI:
16654 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16655 exp, target, false);
16656 case VSX_BUILTIN_LXVW4X_V16QI:
16657 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16658 exp, target, false);
16659 /* For the following on big endian, it's ok to use any appropriate
16660 unaligned-supporting load, so use a generic expander. For
16661 little-endian, the exact element-reversing instruction must
16662 be used. */
16663 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16665 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16666 : CODE_FOR_vsx_ld_elemrev_v2df);
16667 return altivec_expand_lv_builtin (code, exp, target, false);
16669 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16671 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16672 : CODE_FOR_vsx_ld_elemrev_v2di);
16673 return altivec_expand_lv_builtin (code, exp, target, false);
16675 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16677 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16678 : CODE_FOR_vsx_ld_elemrev_v4sf);
16679 return altivec_expand_lv_builtin (code, exp, target, false);
16681 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16683 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16684 : CODE_FOR_vsx_ld_elemrev_v4si);
16685 return altivec_expand_lv_builtin (code, exp, target, false);
16687 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16689 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16690 : CODE_FOR_vsx_ld_elemrev_v8hi);
16691 return altivec_expand_lv_builtin (code, exp, target, false);
16693 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16695 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16696 : CODE_FOR_vsx_ld_elemrev_v16qi);
16697 return altivec_expand_lv_builtin (code, exp, target, false);
16699 break;
16700 default:
16701 break;
16702 /* Fall through. */
16705 *expandedp = false;
16706 return NULL_RTX;
16709 /* Expand the builtin in EXP and store the result in TARGET. Store
16710 true in *EXPANDEDP if we found a builtin to expand. */
16711 static rtx
16712 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16714 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16715 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16716 const struct builtin_description *d;
16717 size_t i;
16719 *expandedp = true;
16721 switch (fcode)
16723 case PAIRED_BUILTIN_STX:
16724 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16725 case PAIRED_BUILTIN_LX:
16726 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16727 default:
16728 break;
16729 /* Fall through. */
16732 /* Expand the paired predicates. */
16733 d = bdesc_paired_preds;
16734 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16735 if (d->code == fcode)
16736 return paired_expand_predicate_builtin (d->icode, exp, target);
16738 *expandedp = false;
16739 return NULL_RTX;
16742 /* Binops that need to be initialized manually, but can be expanded
16743 automagically by rs6000_expand_binop_builtin. */
16744 static const struct builtin_description bdesc_2arg_spe[] =
16746 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16747 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16748 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16749 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16750 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16751 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16752 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16753 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16754 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16755 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16756 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16757 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16758 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16759 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16760 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16761 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16762 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16763 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16764 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16765 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16766 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16767 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16770 /* Expand the builtin in EXP and store the result in TARGET. Store
16771 true in *EXPANDEDP if we found a builtin to expand.
16773 This expands the SPE builtins that are not simple unary and binary
16774 operations. */
16775 static rtx
16776 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16778 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16779 tree arg1, arg0;
16780 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16781 enum insn_code icode;
16782 machine_mode tmode, mode0;
16783 rtx pat, op0;
16784 const struct builtin_description *d;
16785 size_t i;
16787 *expandedp = true;
16789 /* Syntax check for a 5-bit unsigned immediate. */
16790 switch (fcode)
16792 case SPE_BUILTIN_EVSTDD:
16793 case SPE_BUILTIN_EVSTDH:
16794 case SPE_BUILTIN_EVSTDW:
16795 case SPE_BUILTIN_EVSTWHE:
16796 case SPE_BUILTIN_EVSTWHO:
16797 case SPE_BUILTIN_EVSTWWE:
16798 case SPE_BUILTIN_EVSTWWO:
16799 arg1 = CALL_EXPR_ARG (exp, 2);
16800 if (TREE_CODE (arg1) != INTEGER_CST
16801 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16803 error ("argument 2 must be a 5-bit unsigned literal");
16804 return const0_rtx;
16806 break;
16807 default:
16808 break;
16811 /* The evsplat*i instructions are not quite generic. */
16812 switch (fcode)
16814 case SPE_BUILTIN_EVSPLATFI:
16815 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16816 exp, target);
16817 case SPE_BUILTIN_EVSPLATI:
16818 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16819 exp, target);
16820 default:
16821 break;
16824 d = bdesc_2arg_spe;
16825 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16826 if (d->code == fcode)
16827 return rs6000_expand_binop_builtin (d->icode, exp, target);
16829 d = bdesc_spe_predicates;
16830 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16831 if (d->code == fcode)
16832 return spe_expand_predicate_builtin (d->icode, exp, target);
16834 d = bdesc_spe_evsel;
16835 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16836 if (d->code == fcode)
16837 return spe_expand_evsel_builtin (d->icode, exp, target);
16839 switch (fcode)
16841 case SPE_BUILTIN_EVSTDDX:
16842 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16843 case SPE_BUILTIN_EVSTDHX:
16844 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16845 case SPE_BUILTIN_EVSTDWX:
16846 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16847 case SPE_BUILTIN_EVSTWHEX:
16848 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16849 case SPE_BUILTIN_EVSTWHOX:
16850 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16851 case SPE_BUILTIN_EVSTWWEX:
16852 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16853 case SPE_BUILTIN_EVSTWWOX:
16854 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16855 case SPE_BUILTIN_EVSTDD:
16856 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16857 case SPE_BUILTIN_EVSTDH:
16858 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16859 case SPE_BUILTIN_EVSTDW:
16860 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16861 case SPE_BUILTIN_EVSTWHE:
16862 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16863 case SPE_BUILTIN_EVSTWHO:
16864 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16865 case SPE_BUILTIN_EVSTWWE:
16866 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16867 case SPE_BUILTIN_EVSTWWO:
16868 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16869 case SPE_BUILTIN_MFSPEFSCR:
16870 icode = CODE_FOR_spe_mfspefscr;
16871 tmode = insn_data[icode].operand[0].mode;
16873 if (target == 0
16874 || GET_MODE (target) != tmode
16875 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16876 target = gen_reg_rtx (tmode);
16878 pat = GEN_FCN (icode) (target);
16879 if (! pat)
16880 return 0;
16881 emit_insn (pat);
16882 return target;
16883 case SPE_BUILTIN_MTSPEFSCR:
16884 icode = CODE_FOR_spe_mtspefscr;
16885 arg0 = CALL_EXPR_ARG (exp, 0);
16886 op0 = expand_normal (arg0);
16887 mode0 = insn_data[icode].operand[0].mode;
16889 if (arg0 == error_mark_node)
16890 return const0_rtx;
16892 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16893 op0 = copy_to_mode_reg (mode0, op0);
16895 pat = GEN_FCN (icode) (op0);
16896 if (pat)
16897 emit_insn (pat);
16898 return NULL_RTX;
16899 default:
16900 break;
16903 *expandedp = false;
16904 return NULL_RTX;
16907 static rtx
16908 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16910 rtx pat, scratch, tmp;
16911 tree form = CALL_EXPR_ARG (exp, 0);
16912 tree arg0 = CALL_EXPR_ARG (exp, 1);
16913 tree arg1 = CALL_EXPR_ARG (exp, 2);
16914 rtx op0 = expand_normal (arg0);
16915 rtx op1 = expand_normal (arg1);
16916 machine_mode mode0 = insn_data[icode].operand[1].mode;
16917 machine_mode mode1 = insn_data[icode].operand[2].mode;
16918 int form_int;
16919 enum rtx_code code;
16921 if (TREE_CODE (form) != INTEGER_CST)
16923 error ("argument 1 of __builtin_paired_predicate must be a constant");
16924 return const0_rtx;
16926 else
16927 form_int = TREE_INT_CST_LOW (form);
16929 gcc_assert (mode0 == mode1);
16931 if (arg0 == error_mark_node || arg1 == error_mark_node)
16932 return const0_rtx;
16934 if (target == 0
16935 || GET_MODE (target) != SImode
16936 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16937 target = gen_reg_rtx (SImode);
16938 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16939 op0 = copy_to_mode_reg (mode0, op0);
16940 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16941 op1 = copy_to_mode_reg (mode1, op1);
16943 scratch = gen_reg_rtx (CCFPmode);
16945 pat = GEN_FCN (icode) (scratch, op0, op1);
16946 if (!pat)
16947 return const0_rtx;
16949 emit_insn (pat);
16951 switch (form_int)
16953 /* LT bit. */
16954 case 0:
16955 code = LT;
16956 break;
16957 /* GT bit. */
16958 case 1:
16959 code = GT;
16960 break;
16961 /* EQ bit. */
16962 case 2:
16963 code = EQ;
16964 break;
16965 /* UN bit. */
16966 case 3:
16967 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16968 return target;
16969 default:
16970 error ("argument 1 of __builtin_paired_predicate is out of range");
16971 return const0_rtx;
16974 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16975 emit_move_insn (target, tmp);
16976 return target;
16979 static rtx
16980 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16982 rtx pat, scratch, tmp;
16983 tree form = CALL_EXPR_ARG (exp, 0);
16984 tree arg0 = CALL_EXPR_ARG (exp, 1);
16985 tree arg1 = CALL_EXPR_ARG (exp, 2);
16986 rtx op0 = expand_normal (arg0);
16987 rtx op1 = expand_normal (arg1);
16988 machine_mode mode0 = insn_data[icode].operand[1].mode;
16989 machine_mode mode1 = insn_data[icode].operand[2].mode;
16990 int form_int;
16991 enum rtx_code code;
16993 if (TREE_CODE (form) != INTEGER_CST)
16995 error ("argument 1 of __builtin_spe_predicate must be a constant");
16996 return const0_rtx;
16998 else
16999 form_int = TREE_INT_CST_LOW (form);
17001 gcc_assert (mode0 == mode1);
17003 if (arg0 == error_mark_node || arg1 == error_mark_node)
17004 return const0_rtx;
17006 if (target == 0
17007 || GET_MODE (target) != SImode
17008 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
17009 target = gen_reg_rtx (SImode);
17011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17012 op0 = copy_to_mode_reg (mode0, op0);
17013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17014 op1 = copy_to_mode_reg (mode1, op1);
17016 scratch = gen_reg_rtx (CCmode);
17018 pat = GEN_FCN (icode) (scratch, op0, op1);
17019 if (! pat)
17020 return const0_rtx;
17021 emit_insn (pat);
17023 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
17024 _lower_. We use one compare, but look in different bits of the
17025 CR for each variant.
17027 There are 2 elements in each SPE simd type (upper/lower). The CR
17028 bits are set as follows:
17030 BIT0 | BIT 1 | BIT 2 | BIT 3
17031 U | L | (U | L) | (U & L)
17033 So, for an "all" relationship, BIT 3 would be set.
17034 For an "any" relationship, BIT 2 would be set. Etc.
17036 Following traditional nomenclature, these bits map to:
17038 BIT0 | BIT 1 | BIT 2 | BIT 3
17039 LT | GT | EQ | OV
17041 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
17044 switch (form_int)
17046 /* All variant. OV bit. */
17047 case 0:
17048 /* We need to get to the OV bit, which is the ORDERED bit. We
17049 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
17050 that's ugly and will make validate_condition_mode die.
17051 So let's just use another pattern. */
17052 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
17053 return target;
17054 /* Any variant. EQ bit. */
17055 case 1:
17056 code = EQ;
17057 break;
17058 /* Upper variant. LT bit. */
17059 case 2:
17060 code = LT;
17061 break;
17062 /* Lower variant. GT bit. */
17063 case 3:
17064 code = GT;
17065 break;
17066 default:
17067 error ("argument 1 of __builtin_spe_predicate is out of range");
17068 return const0_rtx;
17071 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
17072 emit_move_insn (target, tmp);
17074 return target;
17077 /* The evsel builtins look like this:
17079 e = __builtin_spe_evsel_OP (a, b, c, d);
17081 and work like this:
17083 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
17084 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
17087 static rtx
17088 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
17090 rtx pat, scratch;
17091 tree arg0 = CALL_EXPR_ARG (exp, 0);
17092 tree arg1 = CALL_EXPR_ARG (exp, 1);
17093 tree arg2 = CALL_EXPR_ARG (exp, 2);
17094 tree arg3 = CALL_EXPR_ARG (exp, 3);
17095 rtx op0 = expand_normal (arg0);
17096 rtx op1 = expand_normal (arg1);
17097 rtx op2 = expand_normal (arg2);
17098 rtx op3 = expand_normal (arg3);
17099 machine_mode mode0 = insn_data[icode].operand[1].mode;
17100 machine_mode mode1 = insn_data[icode].operand[2].mode;
17102 gcc_assert (mode0 == mode1);
17104 if (arg0 == error_mark_node || arg1 == error_mark_node
17105 || arg2 == error_mark_node || arg3 == error_mark_node)
17106 return const0_rtx;
17108 if (target == 0
17109 || GET_MODE (target) != mode0
17110 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
17111 target = gen_reg_rtx (mode0);
17113 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17114 op0 = copy_to_mode_reg (mode0, op0);
17115 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17116 op1 = copy_to_mode_reg (mode0, op1);
17117 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
17118 op2 = copy_to_mode_reg (mode0, op2);
17119 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
17120 op3 = copy_to_mode_reg (mode0, op3);
17122 /* Generate the compare. */
17123 scratch = gen_reg_rtx (CCmode);
17124 pat = GEN_FCN (icode) (scratch, op0, op1);
17125 if (! pat)
17126 return const0_rtx;
17127 emit_insn (pat);
17129 if (mode0 == V2SImode)
17130 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
17131 else
17132 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
17134 return target;
17137 /* Raise an error message for a builtin function that is called without the
17138 appropriate target options being set. */
17140 static void
17141 rs6000_invalid_builtin (enum rs6000_builtins fncode)
17143 size_t uns_fncode = (size_t)fncode;
17144 const char *name = rs6000_builtin_info[uns_fncode].name;
17145 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
17147 gcc_assert (name != NULL);
17148 if ((fnmask & RS6000_BTM_CELL) != 0)
17149 error ("Builtin function %s is only valid for the cell processor", name);
17150 else if ((fnmask & RS6000_BTM_VSX) != 0)
17151 error ("Builtin function %s requires the -mvsx option", name);
17152 else if ((fnmask & RS6000_BTM_HTM) != 0)
17153 error ("Builtin function %s requires the -mhtm option", name);
17154 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
17155 error ("Builtin function %s requires the -maltivec option", name);
17156 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
17157 error ("Builtin function %s requires the -mpaired option", name);
17158 else if ((fnmask & RS6000_BTM_SPE) != 0)
17159 error ("Builtin function %s requires the -mspe option", name);
17160 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17161 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17162 error ("Builtin function %s requires the -mhard-dfp and"
17163 " -mpower8-vector options", name);
17164 else if ((fnmask & RS6000_BTM_DFP) != 0)
17165 error ("Builtin function %s requires the -mhard-dfp option", name);
17166 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
17167 error ("Builtin function %s requires the -mpower8-vector option", name);
17168 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17169 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17170 error ("Builtin function %s requires the -mcpu=power9 and"
17171 " -m64 options", name);
17172 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
17173 error ("Builtin function %s requires the -mcpu=power9 option", name);
17174 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17175 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17176 error ("Builtin function %s requires the -mcpu=power9 and"
17177 " -m64 options", name);
17178 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
17179 error ("Builtin function %s requires the -mcpu=power9 option", name);
17180 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17181 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17182 error ("Builtin function %s requires the -mhard-float and"
17183 " -mlong-double-128 options", name);
17184 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
17185 error ("Builtin function %s requires the -mhard-float option", name);
17186 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
17187 error ("Builtin function %s requires the -mfloat128 option", name);
17188 else
17189 error ("Builtin function %s is not supported with the current options",
17190 name);
17193 /* Target hook for early folding of built-ins, shamelessly stolen
17194 from ia64.c. */
17196 static tree
17197 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
17198 tree *args, bool ignore ATTRIBUTE_UNUSED)
17200 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17202 enum rs6000_builtins fn_code
17203 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17204 switch (fn_code)
17206 case RS6000_BUILTIN_NANQ:
17207 case RS6000_BUILTIN_NANSQ:
17209 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17210 const char *str = c_getstr (*args);
17211 int quiet = fn_code == RS6000_BUILTIN_NANQ;
17212 REAL_VALUE_TYPE real;
17214 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17215 return build_real (type, real);
17216 return NULL_TREE;
17218 case RS6000_BUILTIN_INFQ:
17219 case RS6000_BUILTIN_HUGE_VALQ:
17221 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17222 REAL_VALUE_TYPE inf;
17223 real_inf (&inf);
17224 return build_real (type, inf);
17226 default:
17227 break;
17230 #ifdef SUBTARGET_FOLD_BUILTIN
17231 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17232 #else
17233 return NULL_TREE;
17234 #endif
17237 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17238 a constant, use rs6000_fold_builtin.) */
17240 bool
17241 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17243 gimple *stmt = gsi_stmt (*gsi);
17244 tree fndecl = gimple_call_fndecl (stmt);
17245 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
17246 enum rs6000_builtins fn_code
17247 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17248 tree arg0, arg1, lhs;
17250 switch (fn_code)
17252 /* Flavors of vec_add. We deliberately don't expand
17253 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17254 TImode, resulting in much poorer code generation. */
17255 case ALTIVEC_BUILTIN_VADDUBM:
17256 case ALTIVEC_BUILTIN_VADDUHM:
17257 case ALTIVEC_BUILTIN_VADDUWM:
17258 case P8V_BUILTIN_VADDUDM:
17259 case ALTIVEC_BUILTIN_VADDFP:
17260 case VSX_BUILTIN_XVADDDP:
17262 arg0 = gimple_call_arg (stmt, 0);
17263 arg1 = gimple_call_arg (stmt, 1);
17264 lhs = gimple_call_lhs (stmt);
17265 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17266 gimple_set_location (g, gimple_location (stmt));
17267 gsi_replace (gsi, g, true);
17268 return true;
17270 /* Flavors of vec_sub. We deliberately don't expand
17271 P8V_BUILTIN_VSUBUQM. */
17272 case ALTIVEC_BUILTIN_VSUBUBM:
17273 case ALTIVEC_BUILTIN_VSUBUHM:
17274 case ALTIVEC_BUILTIN_VSUBUWM:
17275 case P8V_BUILTIN_VSUBUDM:
17276 case ALTIVEC_BUILTIN_VSUBFP:
17277 case VSX_BUILTIN_XVSUBDP:
17279 arg0 = gimple_call_arg (stmt, 0);
17280 arg1 = gimple_call_arg (stmt, 1);
17281 lhs = gimple_call_lhs (stmt);
17282 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17283 gimple_set_location (g, gimple_location (stmt));
17284 gsi_replace (gsi, g, true);
17285 return true;
17287 case VSX_BUILTIN_XVMULSP:
17288 case VSX_BUILTIN_XVMULDP:
17290 arg0 = gimple_call_arg (stmt, 0);
17291 arg1 = gimple_call_arg (stmt, 1);
17292 lhs = gimple_call_lhs (stmt);
17293 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
17294 gimple_set_location (g, gimple_location (stmt));
17295 gsi_replace (gsi, g, true);
17296 return true;
17298 /* Even element flavors of vec_mul (signed). */
17299 case ALTIVEC_BUILTIN_VMULESB:
17300 case ALTIVEC_BUILTIN_VMULESH:
17301 /* Even element flavors of vec_mul (unsigned). */
17302 case ALTIVEC_BUILTIN_VMULEUB:
17303 case ALTIVEC_BUILTIN_VMULEUH:
17305 arg0 = gimple_call_arg (stmt, 0);
17306 arg1 = gimple_call_arg (stmt, 1);
17307 lhs = gimple_call_lhs (stmt);
17308 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17309 gimple_set_location (g, gimple_location (stmt));
17310 gsi_replace (gsi, g, true);
17311 return true;
17313 /* Odd element flavors of vec_mul (signed). */
17314 case ALTIVEC_BUILTIN_VMULOSB:
17315 case ALTIVEC_BUILTIN_VMULOSH:
17316 /* Odd element flavors of vec_mul (unsigned). */
17317 case ALTIVEC_BUILTIN_VMULOUB:
17318 case ALTIVEC_BUILTIN_VMULOUH:
17320 arg0 = gimple_call_arg (stmt, 0);
17321 arg1 = gimple_call_arg (stmt, 1);
17322 lhs = gimple_call_lhs (stmt);
17323 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17324 gimple_set_location (g, gimple_location (stmt));
17325 gsi_replace (gsi, g, true);
17326 return true;
17328 /* Flavors of vec_div (Integer). */
17329 case VSX_BUILTIN_DIV_V2DI:
17330 case VSX_BUILTIN_UDIV_V2DI:
17332 arg0 = gimple_call_arg (stmt, 0);
17333 arg1 = gimple_call_arg (stmt, 1);
17334 lhs = gimple_call_lhs (stmt);
17335 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
17336 gimple_set_location (g, gimple_location (stmt));
17337 gsi_replace (gsi, g, true);
17338 return true;
17340 /* Flavors of vec_div (Float). */
17341 case VSX_BUILTIN_XVDIVSP:
17342 case VSX_BUILTIN_XVDIVDP:
17344 arg0 = gimple_call_arg (stmt, 0);
17345 arg1 = gimple_call_arg (stmt, 1);
17346 lhs = gimple_call_lhs (stmt);
17347 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
17348 gimple_set_location (g, gimple_location (stmt));
17349 gsi_replace (gsi, g, true);
17350 return true;
17352 /* Flavors of vec_and. */
17353 case ALTIVEC_BUILTIN_VAND:
17355 arg0 = gimple_call_arg (stmt, 0);
17356 arg1 = gimple_call_arg (stmt, 1);
17357 lhs = gimple_call_lhs (stmt);
17358 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
17359 gimple_set_location (g, gimple_location (stmt));
17360 gsi_replace (gsi, g, true);
17361 return true;
17363 /* Flavors of vec_andc. */
17364 case ALTIVEC_BUILTIN_VANDC:
17366 arg0 = gimple_call_arg (stmt, 0);
17367 arg1 = gimple_call_arg (stmt, 1);
17368 lhs = gimple_call_lhs (stmt);
17369 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17370 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17371 gimple_set_location (g, gimple_location (stmt));
17372 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17373 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
17374 gimple_set_location (g, gimple_location (stmt));
17375 gsi_replace (gsi, g, true);
17376 return true;
17378 /* Flavors of vec_nand. */
17379 case P8V_BUILTIN_VEC_NAND:
17380 case P8V_BUILTIN_NAND_V16QI:
17381 case P8V_BUILTIN_NAND_V8HI:
17382 case P8V_BUILTIN_NAND_V4SI:
17383 case P8V_BUILTIN_NAND_V4SF:
17384 case P8V_BUILTIN_NAND_V2DF:
17385 case P8V_BUILTIN_NAND_V2DI:
17387 arg0 = gimple_call_arg (stmt, 0);
17388 arg1 = gimple_call_arg (stmt, 1);
17389 lhs = gimple_call_lhs (stmt);
17390 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17391 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
17392 gimple_set_location (g, gimple_location (stmt));
17393 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17394 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17395 gimple_set_location (g, gimple_location (stmt));
17396 gsi_replace (gsi, g, true);
17397 return true;
17399 /* Flavors of vec_or. */
17400 case ALTIVEC_BUILTIN_VOR:
17402 arg0 = gimple_call_arg (stmt, 0);
17403 arg1 = gimple_call_arg (stmt, 1);
17404 lhs = gimple_call_lhs (stmt);
17405 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
17406 gimple_set_location (g, gimple_location (stmt));
17407 gsi_replace (gsi, g, true);
17408 return true;
17410 /* flavors of vec_orc. */
17411 case P8V_BUILTIN_ORC_V16QI:
17412 case P8V_BUILTIN_ORC_V8HI:
17413 case P8V_BUILTIN_ORC_V4SI:
17414 case P8V_BUILTIN_ORC_V4SF:
17415 case P8V_BUILTIN_ORC_V2DF:
17416 case P8V_BUILTIN_ORC_V2DI:
17418 arg0 = gimple_call_arg (stmt, 0);
17419 arg1 = gimple_call_arg (stmt, 1);
17420 lhs = gimple_call_lhs (stmt);
17421 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17422 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17423 gimple_set_location (g, gimple_location (stmt));
17424 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17425 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
17426 gimple_set_location (g, gimple_location (stmt));
17427 gsi_replace (gsi, g, true);
17428 return true;
17430 /* Flavors of vec_xor. */
17431 case ALTIVEC_BUILTIN_VXOR:
17433 arg0 = gimple_call_arg (stmt, 0);
17434 arg1 = gimple_call_arg (stmt, 1);
17435 lhs = gimple_call_lhs (stmt);
17436 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
17437 gimple_set_location (g, gimple_location (stmt));
17438 gsi_replace (gsi, g, true);
17439 return true;
17441 /* Flavors of vec_nor. */
17442 case ALTIVEC_BUILTIN_VNOR:
17444 arg0 = gimple_call_arg (stmt, 0);
17445 arg1 = gimple_call_arg (stmt, 1);
17446 lhs = gimple_call_lhs (stmt);
17447 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17448 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
17449 gimple_set_location (g, gimple_location (stmt));
17450 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17451 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17452 gimple_set_location (g, gimple_location (stmt));
17453 gsi_replace (gsi, g, true);
17454 return true;
17456 default:
17457 break;
17460 return false;
17463 /* Expand an expression EXP that calls a built-in function,
17464 with result going to TARGET if that's convenient
17465 (and in mode MODE if that's convenient).
17466 SUBTARGET may be used as the target for computing one of EXP's operands.
17467 IGNORE is nonzero if the value is to be ignored. */
17469 static rtx
17470 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17471 machine_mode mode ATTRIBUTE_UNUSED,
17472 int ignore ATTRIBUTE_UNUSED)
17474 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17475 enum rs6000_builtins fcode
17476 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17477 size_t uns_fcode = (size_t)fcode;
17478 const struct builtin_description *d;
17479 size_t i;
17480 rtx ret;
17481 bool success;
17482 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17483 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17485 if (TARGET_DEBUG_BUILTIN)
17487 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17488 const char *name1 = rs6000_builtin_info[uns_fcode].name;
17489 const char *name2 = ((icode != CODE_FOR_nothing)
17490 ? get_insn_name ((int)icode)
17491 : "nothing");
17492 const char *name3;
17494 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17496 default: name3 = "unknown"; break;
17497 case RS6000_BTC_SPECIAL: name3 = "special"; break;
17498 case RS6000_BTC_UNARY: name3 = "unary"; break;
17499 case RS6000_BTC_BINARY: name3 = "binary"; break;
17500 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
17501 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
17502 case RS6000_BTC_ABS: name3 = "abs"; break;
17503 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
17504 case RS6000_BTC_DST: name3 = "dst"; break;
17508 fprintf (stderr,
17509 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17510 (name1) ? name1 : "---", fcode,
17511 (name2) ? name2 : "---", (int)icode,
17512 name3,
17513 func_valid_p ? "" : ", not valid");
17516 if (!func_valid_p)
17518 rs6000_invalid_builtin (fcode);
17520 /* Given it is invalid, just generate a normal call. */
17521 return expand_call (exp, target, ignore);
17524 switch (fcode)
17526 case RS6000_BUILTIN_RECIP:
17527 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17529 case RS6000_BUILTIN_RECIPF:
17530 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17532 case RS6000_BUILTIN_RSQRTF:
17533 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17535 case RS6000_BUILTIN_RSQRT:
17536 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17538 case POWER7_BUILTIN_BPERMD:
17539 return rs6000_expand_binop_builtin (((TARGET_64BIT)
17540 ? CODE_FOR_bpermd_di
17541 : CODE_FOR_bpermd_si), exp, target);
17543 case RS6000_BUILTIN_GET_TB:
17544 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17545 target);
17547 case RS6000_BUILTIN_MFTB:
17548 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17549 ? CODE_FOR_rs6000_mftb_di
17550 : CODE_FOR_rs6000_mftb_si),
17551 target);
17553 case RS6000_BUILTIN_MFFS:
17554 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17556 case RS6000_BUILTIN_MTFSF:
17557 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17559 case RS6000_BUILTIN_CPU_INIT:
17560 case RS6000_BUILTIN_CPU_IS:
17561 case RS6000_BUILTIN_CPU_SUPPORTS:
17562 return cpu_expand_builtin (fcode, exp, target);
17564 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17565 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17567 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17568 : (int) CODE_FOR_altivec_lvsl_direct);
17569 machine_mode tmode = insn_data[icode].operand[0].mode;
17570 machine_mode mode = insn_data[icode].operand[1].mode;
17571 tree arg;
17572 rtx op, addr, pat;
17574 gcc_assert (TARGET_ALTIVEC);
17576 arg = CALL_EXPR_ARG (exp, 0);
17577 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17578 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17579 addr = memory_address (mode, op);
17580 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17581 op = addr;
17582 else
17584 /* For the load case need to negate the address. */
17585 op = gen_reg_rtx (GET_MODE (addr));
17586 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17588 op = gen_rtx_MEM (mode, op);
17590 if (target == 0
17591 || GET_MODE (target) != tmode
17592 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17593 target = gen_reg_rtx (tmode);
17595 pat = GEN_FCN (icode) (target, op);
17596 if (!pat)
17597 return 0;
17598 emit_insn (pat);
17600 return target;
17603 case ALTIVEC_BUILTIN_VCFUX:
17604 case ALTIVEC_BUILTIN_VCFSX:
17605 case ALTIVEC_BUILTIN_VCTUXS:
17606 case ALTIVEC_BUILTIN_VCTSXS:
17607 /* FIXME: There's got to be a nicer way to handle this case than
17608 constructing a new CALL_EXPR. */
17609 if (call_expr_nargs (exp) == 1)
17611 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17612 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17614 break;
17616 default:
17617 break;
17620 if (TARGET_ALTIVEC)
17622 ret = altivec_expand_builtin (exp, target, &success);
17624 if (success)
17625 return ret;
17627 if (TARGET_SPE)
17629 ret = spe_expand_builtin (exp, target, &success);
17631 if (success)
17632 return ret;
17634 if (TARGET_PAIRED_FLOAT)
17636 ret = paired_expand_builtin (exp, target, &success);
17638 if (success)
17639 return ret;
17641 if (TARGET_HTM)
17643 ret = htm_expand_builtin (exp, target, &success);
17645 if (success)
17646 return ret;
17649 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17650 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17651 gcc_assert (attr == RS6000_BTC_UNARY
17652 || attr == RS6000_BTC_BINARY
17653 || attr == RS6000_BTC_TERNARY
17654 || attr == RS6000_BTC_SPECIAL);
17656 /* Handle simple unary operations. */
17657 d = bdesc_1arg;
17658 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17659 if (d->code == fcode)
17660 return rs6000_expand_unop_builtin (d->icode, exp, target);
17662 /* Handle simple binary operations. */
17663 d = bdesc_2arg;
17664 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17665 if (d->code == fcode)
17666 return rs6000_expand_binop_builtin (d->icode, exp, target);
17668 /* Handle simple ternary operations. */
17669 d = bdesc_3arg;
17670 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17671 if (d->code == fcode)
17672 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17674 /* Handle simple no-argument operations. */
17675 d = bdesc_0arg;
17676 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17677 if (d->code == fcode)
17678 return rs6000_expand_zeroop_builtin (d->icode, target);
17680 gcc_unreachable ();
17683 /* Create a builtin vector type with a name. Taking care not to give
17684 the canonical type a name. */
17686 static tree
17687 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17689 tree result = build_vector_type (elt_type, num_elts);
17691 /* Copy so we don't give the canonical type a name. */
17692 result = build_variant_type_copy (result);
17694 add_builtin_type (name, result);
17696 return result;
17699 static void
17700 rs6000_init_builtins (void)
17702 tree tdecl;
17703 tree ftype;
17704 machine_mode mode;
17706 if (TARGET_DEBUG_BUILTIN)
17707 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17708 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17709 (TARGET_SPE) ? ", spe" : "",
17710 (TARGET_ALTIVEC) ? ", altivec" : "",
17711 (TARGET_VSX) ? ", vsx" : "");
17713 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17714 V2SF_type_node = build_vector_type (float_type_node, 2);
17715 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17716 : "__vector long long",
17717 intDI_type_node, 2);
17718 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17719 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17720 V4SI_type_node = rs6000_vector_type ("__vector signed int",
17721 intSI_type_node, 4);
17722 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17723 V8HI_type_node = rs6000_vector_type ("__vector signed short",
17724 intHI_type_node, 8);
17725 V16QI_type_node = rs6000_vector_type ("__vector signed char",
17726 intQI_type_node, 16);
17728 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17729 unsigned_intQI_type_node, 16);
17730 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17731 unsigned_intHI_type_node, 8);
17732 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17733 unsigned_intSI_type_node, 4);
17734 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17735 ? "__vector unsigned long"
17736 : "__vector unsigned long long",
17737 unsigned_intDI_type_node, 2);
17739 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17740 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17741 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17742 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17744 const_str_type_node
17745 = build_pointer_type (build_qualified_type (char_type_node,
17746 TYPE_QUAL_CONST));
17748 /* We use V1TI mode as a special container to hold __int128_t items that
17749 must live in VSX registers. */
17750 if (intTI_type_node)
17752 V1TI_type_node = rs6000_vector_type ("__vector __int128",
17753 intTI_type_node, 1);
17754 unsigned_V1TI_type_node
17755 = rs6000_vector_type ("__vector unsigned __int128",
17756 unsigned_intTI_type_node, 1);
17759 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17760 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17761 'vector unsigned short'. */
17763 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17764 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17765 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17766 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17767 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17769 long_integer_type_internal_node = long_integer_type_node;
17770 long_unsigned_type_internal_node = long_unsigned_type_node;
17771 long_long_integer_type_internal_node = long_long_integer_type_node;
17772 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17773 intQI_type_internal_node = intQI_type_node;
17774 uintQI_type_internal_node = unsigned_intQI_type_node;
17775 intHI_type_internal_node = intHI_type_node;
17776 uintHI_type_internal_node = unsigned_intHI_type_node;
17777 intSI_type_internal_node = intSI_type_node;
17778 uintSI_type_internal_node = unsigned_intSI_type_node;
17779 intDI_type_internal_node = intDI_type_node;
17780 uintDI_type_internal_node = unsigned_intDI_type_node;
17781 intTI_type_internal_node = intTI_type_node;
17782 uintTI_type_internal_node = unsigned_intTI_type_node;
17783 float_type_internal_node = float_type_node;
17784 double_type_internal_node = double_type_node;
17785 long_double_type_internal_node = long_double_type_node;
17786 dfloat64_type_internal_node = dfloat64_type_node;
17787 dfloat128_type_internal_node = dfloat128_type_node;
17788 void_type_internal_node = void_type_node;
17790 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17791 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17792 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17793 format that uses a pair of doubles, depending on the switches and
17794 defaults.
17796 We do not enable the actual __float128 keyword unless the user explicitly
17797 asks for it, because the library support is not yet complete.
17799 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17800 floating point, we need make sure the type is non-zero or else self-test
17801 fails during bootstrap.
17803 We don't register a built-in type for __ibm128 if the type is the same as
17804 long double. Instead we add a #define for __ibm128 in
17805 rs6000_cpu_cpp_builtins to long double. */
17806 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17808 ibm128_float_type_node = make_node (REAL_TYPE);
17809 TYPE_PRECISION (ibm128_float_type_node) = 128;
17810 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17811 layout_type (ibm128_float_type_node);
17813 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17814 "__ibm128");
17816 else
17817 ibm128_float_type_node = long_double_type_node;
17819 if (TARGET_FLOAT128_KEYWORD)
17821 ieee128_float_type_node = float128_type_node;
17822 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17823 "__float128");
17826 else if (TARGET_FLOAT128_TYPE)
17828 ieee128_float_type_node = make_node (REAL_TYPE);
17829 TYPE_PRECISION (ibm128_float_type_node) = 128;
17830 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17831 layout_type (ieee128_float_type_node);
17833 /* If we are not exporting the __float128/_Float128 keywords, we need a
17834 keyword to get the types created. Use __ieee128 as the dummy
17835 keyword. */
17836 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17837 "__ieee128");
17840 else
17841 ieee128_float_type_node = long_double_type_node;
17843 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17844 tree type node. */
17845 builtin_mode_to_type[QImode][0] = integer_type_node;
17846 builtin_mode_to_type[HImode][0] = integer_type_node;
17847 builtin_mode_to_type[SImode][0] = intSI_type_node;
17848 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17849 builtin_mode_to_type[DImode][0] = intDI_type_node;
17850 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17851 builtin_mode_to_type[TImode][0] = intTI_type_node;
17852 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17853 builtin_mode_to_type[SFmode][0] = float_type_node;
17854 builtin_mode_to_type[DFmode][0] = double_type_node;
17855 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17856 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17857 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17858 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17859 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17860 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17861 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17862 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17863 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17864 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17865 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17866 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17867 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17868 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17869 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17870 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17871 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17872 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17873 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17874 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17876 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17877 TYPE_NAME (bool_char_type_node) = tdecl;
17879 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17880 TYPE_NAME (bool_short_type_node) = tdecl;
17882 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17883 TYPE_NAME (bool_int_type_node) = tdecl;
17885 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17886 TYPE_NAME (pixel_type_node) = tdecl;
17888 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17889 bool_char_type_node, 16);
17890 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17891 bool_short_type_node, 8);
17892 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17893 bool_int_type_node, 4);
17894 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17895 ? "__vector __bool long"
17896 : "__vector __bool long long",
17897 bool_long_type_node, 2);
17898 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17899 pixel_type_node, 8);
17901 /* Paired and SPE builtins are only available if you build a compiler with
17902 the appropriate options, so only create those builtins with the
17903 appropriate compiler option. Create Altivec and VSX builtins on machines
17904 with at least the general purpose extensions (970 and newer) to allow the
17905 use of the target attribute. */
17906 if (TARGET_PAIRED_FLOAT)
17907 paired_init_builtins ();
17908 if (TARGET_SPE)
17909 spe_init_builtins ();
17910 if (TARGET_EXTRA_BUILTINS)
17911 altivec_init_builtins ();
17912 if (TARGET_HTM)
17913 htm_init_builtins ();
17915 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17916 rs6000_common_init_builtins ();
17918 ftype = build_function_type_list (ieee128_float_type_node,
17919 const_str_type_node, NULL_TREE);
17920 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17921 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17923 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17924 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17925 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17927 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17928 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17929 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17931 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17932 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17933 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17935 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17936 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17937 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17939 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17940 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17941 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17943 mode = (TARGET_64BIT) ? DImode : SImode;
17944 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17945 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17946 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17948 ftype = build_function_type_list (unsigned_intDI_type_node,
17949 NULL_TREE);
17950 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17952 if (TARGET_64BIT)
17953 ftype = build_function_type_list (unsigned_intDI_type_node,
17954 NULL_TREE);
17955 else
17956 ftype = build_function_type_list (unsigned_intSI_type_node,
17957 NULL_TREE);
17958 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17960 ftype = build_function_type_list (double_type_node, NULL_TREE);
17961 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17963 ftype = build_function_type_list (void_type_node,
17964 intSI_type_node, double_type_node,
17965 NULL_TREE);
17966 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17968 ftype = build_function_type_list (void_type_node, NULL_TREE);
17969 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17971 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17972 NULL_TREE);
17973 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17974 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17976 /* AIX libm provides clog as __clog. */
17977 if (TARGET_XCOFF &&
17978 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17979 set_user_assembler_name (tdecl, "__clog");
17981 #ifdef SUBTARGET_INIT_BUILTINS
17982 SUBTARGET_INIT_BUILTINS;
17983 #endif
17986 /* Returns the rs6000 builtin decl for CODE. */
17988 static tree
17989 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17991 HOST_WIDE_INT fnmask;
17993 if (code >= RS6000_BUILTIN_COUNT)
17994 return error_mark_node;
17996 fnmask = rs6000_builtin_info[code].mask;
17997 if ((fnmask & rs6000_builtin_mask) != fnmask)
17999 rs6000_invalid_builtin ((enum rs6000_builtins)code);
18000 return error_mark_node;
18003 return rs6000_builtin_decls[code];
18006 static void
18007 spe_init_builtins (void)
18009 tree puint_type_node = build_pointer_type (unsigned_type_node);
18010 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
18011 const struct builtin_description *d;
18012 size_t i;
18013 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18015 tree v2si_ftype_4_v2si
18016 = build_function_type_list (opaque_V2SI_type_node,
18017 opaque_V2SI_type_node,
18018 opaque_V2SI_type_node,
18019 opaque_V2SI_type_node,
18020 opaque_V2SI_type_node,
18021 NULL_TREE);
18023 tree v2sf_ftype_4_v2sf
18024 = build_function_type_list (opaque_V2SF_type_node,
18025 opaque_V2SF_type_node,
18026 opaque_V2SF_type_node,
18027 opaque_V2SF_type_node,
18028 opaque_V2SF_type_node,
18029 NULL_TREE);
18031 tree int_ftype_int_v2si_v2si
18032 = build_function_type_list (integer_type_node,
18033 integer_type_node,
18034 opaque_V2SI_type_node,
18035 opaque_V2SI_type_node,
18036 NULL_TREE);
18038 tree int_ftype_int_v2sf_v2sf
18039 = build_function_type_list (integer_type_node,
18040 integer_type_node,
18041 opaque_V2SF_type_node,
18042 opaque_V2SF_type_node,
18043 NULL_TREE);
18045 tree void_ftype_v2si_puint_int
18046 = build_function_type_list (void_type_node,
18047 opaque_V2SI_type_node,
18048 puint_type_node,
18049 integer_type_node,
18050 NULL_TREE);
18052 tree void_ftype_v2si_puint_char
18053 = build_function_type_list (void_type_node,
18054 opaque_V2SI_type_node,
18055 puint_type_node,
18056 char_type_node,
18057 NULL_TREE);
18059 tree void_ftype_v2si_pv2si_int
18060 = build_function_type_list (void_type_node,
18061 opaque_V2SI_type_node,
18062 opaque_p_V2SI_type_node,
18063 integer_type_node,
18064 NULL_TREE);
18066 tree void_ftype_v2si_pv2si_char
18067 = build_function_type_list (void_type_node,
18068 opaque_V2SI_type_node,
18069 opaque_p_V2SI_type_node,
18070 char_type_node,
18071 NULL_TREE);
18073 tree void_ftype_int
18074 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18076 tree int_ftype_void
18077 = build_function_type_list (integer_type_node, NULL_TREE);
18079 tree v2si_ftype_pv2si_int
18080 = build_function_type_list (opaque_V2SI_type_node,
18081 opaque_p_V2SI_type_node,
18082 integer_type_node,
18083 NULL_TREE);
18085 tree v2si_ftype_puint_int
18086 = build_function_type_list (opaque_V2SI_type_node,
18087 puint_type_node,
18088 integer_type_node,
18089 NULL_TREE);
18091 tree v2si_ftype_pushort_int
18092 = build_function_type_list (opaque_V2SI_type_node,
18093 pushort_type_node,
18094 integer_type_node,
18095 NULL_TREE);
18097 tree v2si_ftype_signed_char
18098 = build_function_type_list (opaque_V2SI_type_node,
18099 signed_char_type_node,
18100 NULL_TREE);
18102 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
18104 /* Initialize irregular SPE builtins. */
18106 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
18107 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
18108 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
18109 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
18110 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
18111 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
18112 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
18113 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
18114 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
18115 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
18116 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
18117 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
18118 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
18119 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
18120 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
18121 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
18122 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
18123 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
18125 /* Loads. */
18126 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
18127 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
18128 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
18129 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
18130 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
18131 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
18132 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
18133 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
18134 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
18135 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
18136 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
18137 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
18138 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
18139 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
18140 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
18141 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
18142 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
18143 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
18144 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
18145 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
18146 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
18147 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
18149 /* Predicates. */
18150 d = bdesc_spe_predicates;
18151 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
18153 tree type;
18154 HOST_WIDE_INT mask = d->mask;
18156 if ((mask & builtin_mask) != mask)
18158 if (TARGET_DEBUG_BUILTIN)
18159 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
18160 d->name);
18161 continue;
18164 /* Cannot define builtin if the instruction is disabled. */
18165 gcc_assert (d->icode != CODE_FOR_nothing);
18166 switch (insn_data[d->icode].operand[1].mode)
18168 case E_V2SImode:
18169 type = int_ftype_int_v2si_v2si;
18170 break;
18171 case E_V2SFmode:
18172 type = int_ftype_int_v2sf_v2sf;
18173 break;
18174 default:
18175 gcc_unreachable ();
18178 def_builtin (d->name, type, d->code);
18181 /* Evsel predicates. */
18182 d = bdesc_spe_evsel;
18183 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
18185 tree type;
18186 HOST_WIDE_INT mask = d->mask;
18188 if ((mask & builtin_mask) != mask)
18190 if (TARGET_DEBUG_BUILTIN)
18191 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
18192 d->name);
18193 continue;
18196 /* Cannot define builtin if the instruction is disabled. */
18197 gcc_assert (d->icode != CODE_FOR_nothing);
18198 switch (insn_data[d->icode].operand[1].mode)
18200 case E_V2SImode:
18201 type = v2si_ftype_4_v2si;
18202 break;
18203 case E_V2SFmode:
18204 type = v2sf_ftype_4_v2sf;
18205 break;
18206 default:
18207 gcc_unreachable ();
18210 def_builtin (d->name, type, d->code);
18214 static void
18215 paired_init_builtins (void)
18217 const struct builtin_description *d;
18218 size_t i;
18219 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18221 tree int_ftype_int_v2sf_v2sf
18222 = build_function_type_list (integer_type_node,
18223 integer_type_node,
18224 V2SF_type_node,
18225 V2SF_type_node,
18226 NULL_TREE);
18227 tree pcfloat_type_node =
18228 build_pointer_type (build_qualified_type
18229 (float_type_node, TYPE_QUAL_CONST));
18231 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
18232 long_integer_type_node,
18233 pcfloat_type_node,
18234 NULL_TREE);
18235 tree void_ftype_v2sf_long_pcfloat =
18236 build_function_type_list (void_type_node,
18237 V2SF_type_node,
18238 long_integer_type_node,
18239 pcfloat_type_node,
18240 NULL_TREE);
18243 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
18244 PAIRED_BUILTIN_LX);
18247 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
18248 PAIRED_BUILTIN_STX);
18250 /* Predicates. */
18251 d = bdesc_paired_preds;
18252 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
18254 tree type;
18255 HOST_WIDE_INT mask = d->mask;
18257 if ((mask & builtin_mask) != mask)
18259 if (TARGET_DEBUG_BUILTIN)
18260 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
18261 d->name);
18262 continue;
18265 /* Cannot define builtin if the instruction is disabled. */
18266 gcc_assert (d->icode != CODE_FOR_nothing);
18268 if (TARGET_DEBUG_BUILTIN)
18269 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
18270 (int)i, get_insn_name (d->icode), (int)d->icode,
18271 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
18273 switch (insn_data[d->icode].operand[1].mode)
18275 case E_V2SFmode:
18276 type = int_ftype_int_v2sf_v2sf;
18277 break;
18278 default:
18279 gcc_unreachable ();
18282 def_builtin (d->name, type, d->code);
18286 static void
18287 altivec_init_builtins (void)
18289 const struct builtin_description *d;
18290 size_t i;
18291 tree ftype;
18292 tree decl;
18293 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18295 tree pvoid_type_node = build_pointer_type (void_type_node);
18297 tree pcvoid_type_node
18298 = build_pointer_type (build_qualified_type (void_type_node,
18299 TYPE_QUAL_CONST));
18301 tree int_ftype_opaque
18302 = build_function_type_list (integer_type_node,
18303 opaque_V4SI_type_node, NULL_TREE);
18304 tree opaque_ftype_opaque
18305 = build_function_type_list (integer_type_node, NULL_TREE);
18306 tree opaque_ftype_opaque_int
18307 = build_function_type_list (opaque_V4SI_type_node,
18308 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
18309 tree opaque_ftype_opaque_opaque_int
18310 = build_function_type_list (opaque_V4SI_type_node,
18311 opaque_V4SI_type_node, opaque_V4SI_type_node,
18312 integer_type_node, NULL_TREE);
18313 tree opaque_ftype_opaque_opaque_opaque
18314 = build_function_type_list (opaque_V4SI_type_node,
18315 opaque_V4SI_type_node, opaque_V4SI_type_node,
18316 opaque_V4SI_type_node, NULL_TREE);
18317 tree opaque_ftype_opaque_opaque
18318 = build_function_type_list (opaque_V4SI_type_node,
18319 opaque_V4SI_type_node, opaque_V4SI_type_node,
18320 NULL_TREE);
18321 tree int_ftype_int_opaque_opaque
18322 = build_function_type_list (integer_type_node,
18323 integer_type_node, opaque_V4SI_type_node,
18324 opaque_V4SI_type_node, NULL_TREE);
18325 tree int_ftype_int_v4si_v4si
18326 = build_function_type_list (integer_type_node,
18327 integer_type_node, V4SI_type_node,
18328 V4SI_type_node, NULL_TREE);
18329 tree int_ftype_int_v2di_v2di
18330 = build_function_type_list (integer_type_node,
18331 integer_type_node, V2DI_type_node,
18332 V2DI_type_node, NULL_TREE);
18333 tree void_ftype_v4si
18334 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
18335 tree v8hi_ftype_void
18336 = build_function_type_list (V8HI_type_node, NULL_TREE);
18337 tree void_ftype_void
18338 = build_function_type_list (void_type_node, NULL_TREE);
18339 tree void_ftype_int
18340 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18342 tree opaque_ftype_long_pcvoid
18343 = build_function_type_list (opaque_V4SI_type_node,
18344 long_integer_type_node, pcvoid_type_node,
18345 NULL_TREE);
18346 tree v16qi_ftype_long_pcvoid
18347 = build_function_type_list (V16QI_type_node,
18348 long_integer_type_node, pcvoid_type_node,
18349 NULL_TREE);
18350 tree v8hi_ftype_long_pcvoid
18351 = build_function_type_list (V8HI_type_node,
18352 long_integer_type_node, pcvoid_type_node,
18353 NULL_TREE);
18354 tree v4si_ftype_long_pcvoid
18355 = build_function_type_list (V4SI_type_node,
18356 long_integer_type_node, pcvoid_type_node,
18357 NULL_TREE);
18358 tree v4sf_ftype_long_pcvoid
18359 = build_function_type_list (V4SF_type_node,
18360 long_integer_type_node, pcvoid_type_node,
18361 NULL_TREE);
18362 tree v2df_ftype_long_pcvoid
18363 = build_function_type_list (V2DF_type_node,
18364 long_integer_type_node, pcvoid_type_node,
18365 NULL_TREE);
18366 tree v2di_ftype_long_pcvoid
18367 = build_function_type_list (V2DI_type_node,
18368 long_integer_type_node, pcvoid_type_node,
18369 NULL_TREE);
18371 tree void_ftype_opaque_long_pvoid
18372 = build_function_type_list (void_type_node,
18373 opaque_V4SI_type_node, long_integer_type_node,
18374 pvoid_type_node, NULL_TREE);
18375 tree void_ftype_v4si_long_pvoid
18376 = build_function_type_list (void_type_node,
18377 V4SI_type_node, long_integer_type_node,
18378 pvoid_type_node, NULL_TREE);
18379 tree void_ftype_v16qi_long_pvoid
18380 = build_function_type_list (void_type_node,
18381 V16QI_type_node, long_integer_type_node,
18382 pvoid_type_node, NULL_TREE);
18384 tree void_ftype_v16qi_pvoid_long
18385 = build_function_type_list (void_type_node,
18386 V16QI_type_node, pvoid_type_node,
18387 long_integer_type_node, NULL_TREE);
18389 tree void_ftype_v8hi_long_pvoid
18390 = build_function_type_list (void_type_node,
18391 V8HI_type_node, long_integer_type_node,
18392 pvoid_type_node, NULL_TREE);
18393 tree void_ftype_v4sf_long_pvoid
18394 = build_function_type_list (void_type_node,
18395 V4SF_type_node, long_integer_type_node,
18396 pvoid_type_node, NULL_TREE);
18397 tree void_ftype_v2df_long_pvoid
18398 = build_function_type_list (void_type_node,
18399 V2DF_type_node, long_integer_type_node,
18400 pvoid_type_node, NULL_TREE);
18401 tree void_ftype_v2di_long_pvoid
18402 = build_function_type_list (void_type_node,
18403 V2DI_type_node, long_integer_type_node,
18404 pvoid_type_node, NULL_TREE);
18405 tree int_ftype_int_v8hi_v8hi
18406 = build_function_type_list (integer_type_node,
18407 integer_type_node, V8HI_type_node,
18408 V8HI_type_node, NULL_TREE);
18409 tree int_ftype_int_v16qi_v16qi
18410 = build_function_type_list (integer_type_node,
18411 integer_type_node, V16QI_type_node,
18412 V16QI_type_node, NULL_TREE);
18413 tree int_ftype_int_v4sf_v4sf
18414 = build_function_type_list (integer_type_node,
18415 integer_type_node, V4SF_type_node,
18416 V4SF_type_node, NULL_TREE);
18417 tree int_ftype_int_v2df_v2df
18418 = build_function_type_list (integer_type_node,
18419 integer_type_node, V2DF_type_node,
18420 V2DF_type_node, NULL_TREE);
18421 tree v2di_ftype_v2di
18422 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18423 tree v4si_ftype_v4si
18424 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18425 tree v8hi_ftype_v8hi
18426 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18427 tree v16qi_ftype_v16qi
18428 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18429 tree v4sf_ftype_v4sf
18430 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18431 tree v2df_ftype_v2df
18432 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18433 tree void_ftype_pcvoid_int_int
18434 = build_function_type_list (void_type_node,
18435 pcvoid_type_node, integer_type_node,
18436 integer_type_node, NULL_TREE);
18438 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18439 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18440 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18441 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18442 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18443 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18444 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18445 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18446 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18447 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18448 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18449 ALTIVEC_BUILTIN_LVXL_V2DF);
18450 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18451 ALTIVEC_BUILTIN_LVXL_V2DI);
18452 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18453 ALTIVEC_BUILTIN_LVXL_V4SF);
18454 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18455 ALTIVEC_BUILTIN_LVXL_V4SI);
18456 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18457 ALTIVEC_BUILTIN_LVXL_V8HI);
18458 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18459 ALTIVEC_BUILTIN_LVXL_V16QI);
18460 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18461 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18462 ALTIVEC_BUILTIN_LVX_V2DF);
18463 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18464 ALTIVEC_BUILTIN_LVX_V2DI);
18465 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18466 ALTIVEC_BUILTIN_LVX_V4SF);
18467 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18468 ALTIVEC_BUILTIN_LVX_V4SI);
18469 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18470 ALTIVEC_BUILTIN_LVX_V8HI);
18471 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18472 ALTIVEC_BUILTIN_LVX_V16QI);
18473 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18474 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18475 ALTIVEC_BUILTIN_STVX_V2DF);
18476 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18477 ALTIVEC_BUILTIN_STVX_V2DI);
18478 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18479 ALTIVEC_BUILTIN_STVX_V4SF);
18480 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18481 ALTIVEC_BUILTIN_STVX_V4SI);
18482 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18483 ALTIVEC_BUILTIN_STVX_V8HI);
18484 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18485 ALTIVEC_BUILTIN_STVX_V16QI);
18486 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18487 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18488 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18489 ALTIVEC_BUILTIN_STVXL_V2DF);
18490 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18491 ALTIVEC_BUILTIN_STVXL_V2DI);
18492 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18493 ALTIVEC_BUILTIN_STVXL_V4SF);
18494 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18495 ALTIVEC_BUILTIN_STVXL_V4SI);
18496 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18497 ALTIVEC_BUILTIN_STVXL_V8HI);
18498 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18499 ALTIVEC_BUILTIN_STVXL_V16QI);
18500 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18501 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18502 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18503 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18504 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18505 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18506 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18507 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18508 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18509 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18510 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18511 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18512 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18513 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18514 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18515 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18517 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18518 VSX_BUILTIN_LXVD2X_V2DF);
18519 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18520 VSX_BUILTIN_LXVD2X_V2DI);
18521 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18522 VSX_BUILTIN_LXVW4X_V4SF);
18523 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18524 VSX_BUILTIN_LXVW4X_V4SI);
18525 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18526 VSX_BUILTIN_LXVW4X_V8HI);
18527 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18528 VSX_BUILTIN_LXVW4X_V16QI);
18529 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18530 VSX_BUILTIN_STXVD2X_V2DF);
18531 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18532 VSX_BUILTIN_STXVD2X_V2DI);
18533 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18534 VSX_BUILTIN_STXVW4X_V4SF);
18535 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18536 VSX_BUILTIN_STXVW4X_V4SI);
18537 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18538 VSX_BUILTIN_STXVW4X_V8HI);
18539 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18540 VSX_BUILTIN_STXVW4X_V16QI);
18542 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18543 VSX_BUILTIN_LD_ELEMREV_V2DF);
18544 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18545 VSX_BUILTIN_LD_ELEMREV_V2DI);
18546 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18547 VSX_BUILTIN_LD_ELEMREV_V4SF);
18548 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18549 VSX_BUILTIN_LD_ELEMREV_V4SI);
18550 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18551 VSX_BUILTIN_ST_ELEMREV_V2DF);
18552 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18553 VSX_BUILTIN_ST_ELEMREV_V2DI);
18554 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18555 VSX_BUILTIN_ST_ELEMREV_V4SF);
18556 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18557 VSX_BUILTIN_ST_ELEMREV_V4SI);
18559 if (TARGET_P9_VECTOR)
18561 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18562 VSX_BUILTIN_LD_ELEMREV_V8HI);
18563 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18564 VSX_BUILTIN_LD_ELEMREV_V16QI);
18565 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18566 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18567 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18568 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18570 else
18572 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
18573 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
18574 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
18575 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
18576 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
18577 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
18578 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
18579 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
18582 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18583 VSX_BUILTIN_VEC_LD);
18584 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18585 VSX_BUILTIN_VEC_ST);
18586 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18587 VSX_BUILTIN_VEC_XL);
18588 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18589 VSX_BUILTIN_VEC_XST);
18591 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18592 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18593 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18595 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18596 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18597 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18598 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18599 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18600 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18601 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18602 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18603 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18604 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18605 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18606 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18608 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18609 ALTIVEC_BUILTIN_VEC_ADDE);
18610 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18611 ALTIVEC_BUILTIN_VEC_ADDEC);
18612 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18613 ALTIVEC_BUILTIN_VEC_CMPNE);
18614 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18615 ALTIVEC_BUILTIN_VEC_MUL);
18617 /* Cell builtins. */
18618 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18619 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18620 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18621 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18623 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18624 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18625 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18626 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18628 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18629 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18630 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18631 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18633 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18634 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18635 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18636 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18638 if (TARGET_P9_VECTOR)
18639 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18640 P9V_BUILTIN_STXVL);
18642 /* Add the DST variants. */
18643 d = bdesc_dst;
18644 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18646 HOST_WIDE_INT mask = d->mask;
18648 /* It is expected that these dst built-in functions may have
18649 d->icode equal to CODE_FOR_nothing. */
18650 if ((mask & builtin_mask) != mask)
18652 if (TARGET_DEBUG_BUILTIN)
18653 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18654 d->name);
18655 continue;
18657 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18660 /* Initialize the predicates. */
18661 d = bdesc_altivec_preds;
18662 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18664 machine_mode mode1;
18665 tree type;
18666 HOST_WIDE_INT mask = d->mask;
18668 if ((mask & builtin_mask) != mask)
18670 if (TARGET_DEBUG_BUILTIN)
18671 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18672 d->name);
18673 continue;
18676 if (rs6000_overloaded_builtin_p (d->code))
18677 mode1 = VOIDmode;
18678 else
18680 /* Cannot define builtin if the instruction is disabled. */
18681 gcc_assert (d->icode != CODE_FOR_nothing);
18682 mode1 = insn_data[d->icode].operand[1].mode;
18685 switch (mode1)
18687 case E_VOIDmode:
18688 type = int_ftype_int_opaque_opaque;
18689 break;
18690 case E_V2DImode:
18691 type = int_ftype_int_v2di_v2di;
18692 break;
18693 case E_V4SImode:
18694 type = int_ftype_int_v4si_v4si;
18695 break;
18696 case E_V8HImode:
18697 type = int_ftype_int_v8hi_v8hi;
18698 break;
18699 case E_V16QImode:
18700 type = int_ftype_int_v16qi_v16qi;
18701 break;
18702 case E_V4SFmode:
18703 type = int_ftype_int_v4sf_v4sf;
18704 break;
18705 case E_V2DFmode:
18706 type = int_ftype_int_v2df_v2df;
18707 break;
18708 default:
18709 gcc_unreachable ();
18712 def_builtin (d->name, type, d->code);
18715 /* Initialize the abs* operators. */
18716 d = bdesc_abs;
18717 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18719 machine_mode mode0;
18720 tree type;
18721 HOST_WIDE_INT mask = d->mask;
18723 if ((mask & builtin_mask) != mask)
18725 if (TARGET_DEBUG_BUILTIN)
18726 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18727 d->name);
18728 continue;
18731 /* Cannot define builtin if the instruction is disabled. */
18732 gcc_assert (d->icode != CODE_FOR_nothing);
18733 mode0 = insn_data[d->icode].operand[0].mode;
18735 switch (mode0)
18737 case E_V2DImode:
18738 type = v2di_ftype_v2di;
18739 break;
18740 case E_V4SImode:
18741 type = v4si_ftype_v4si;
18742 break;
18743 case E_V8HImode:
18744 type = v8hi_ftype_v8hi;
18745 break;
18746 case E_V16QImode:
18747 type = v16qi_ftype_v16qi;
18748 break;
18749 case E_V4SFmode:
18750 type = v4sf_ftype_v4sf;
18751 break;
18752 case E_V2DFmode:
18753 type = v2df_ftype_v2df;
18754 break;
18755 default:
18756 gcc_unreachable ();
18759 def_builtin (d->name, type, d->code);
18762 /* Initialize target builtin that implements
18763 targetm.vectorize.builtin_mask_for_load. */
18765 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18766 v16qi_ftype_long_pcvoid,
18767 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18768 BUILT_IN_MD, NULL, NULL_TREE);
18769 TREE_READONLY (decl) = 1;
18770 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18771 altivec_builtin_mask_for_load = decl;
18773 /* Access to the vec_init patterns. */
18774 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18775 integer_type_node, integer_type_node,
18776 integer_type_node, NULL_TREE);
18777 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18779 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18780 short_integer_type_node,
18781 short_integer_type_node,
18782 short_integer_type_node,
18783 short_integer_type_node,
18784 short_integer_type_node,
18785 short_integer_type_node,
18786 short_integer_type_node, NULL_TREE);
18787 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18789 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18790 char_type_node, char_type_node,
18791 char_type_node, char_type_node,
18792 char_type_node, char_type_node,
18793 char_type_node, char_type_node,
18794 char_type_node, char_type_node,
18795 char_type_node, char_type_node,
18796 char_type_node, char_type_node,
18797 char_type_node, NULL_TREE);
18798 def_builtin ("__builtin_vec_init_v16qi", ftype,
18799 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18801 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18802 float_type_node, float_type_node,
18803 float_type_node, NULL_TREE);
18804 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18806 /* VSX builtins. */
18807 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18808 double_type_node, NULL_TREE);
18809 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18811 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18812 intDI_type_node, NULL_TREE);
18813 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18815 /* Access to the vec_set patterns. */
18816 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18817 intSI_type_node,
18818 integer_type_node, NULL_TREE);
18819 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18821 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18822 intHI_type_node,
18823 integer_type_node, NULL_TREE);
18824 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18826 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18827 intQI_type_node,
18828 integer_type_node, NULL_TREE);
18829 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18831 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18832 float_type_node,
18833 integer_type_node, NULL_TREE);
18834 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18836 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18837 double_type_node,
18838 integer_type_node, NULL_TREE);
18839 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18841 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18842 intDI_type_node,
18843 integer_type_node, NULL_TREE);
18844 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18846 /* Access to the vec_extract patterns. */
18847 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18848 integer_type_node, NULL_TREE);
18849 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18851 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18852 integer_type_node, NULL_TREE);
18853 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18855 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18856 integer_type_node, NULL_TREE);
18857 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18859 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18860 integer_type_node, NULL_TREE);
18861 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18863 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18864 integer_type_node, NULL_TREE);
18865 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18867 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18868 integer_type_node, NULL_TREE);
18869 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18872 if (V1TI_type_node)
18874 tree v1ti_ftype_long_pcvoid
18875 = build_function_type_list (V1TI_type_node,
18876 long_integer_type_node, pcvoid_type_node,
18877 NULL_TREE);
18878 tree void_ftype_v1ti_long_pvoid
18879 = build_function_type_list (void_type_node,
18880 V1TI_type_node, long_integer_type_node,
18881 pvoid_type_node, NULL_TREE);
18882 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18883 VSX_BUILTIN_LXVD2X_V1TI);
18884 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18885 VSX_BUILTIN_STXVD2X_V1TI);
18886 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18887 NULL_TREE, NULL_TREE);
18888 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18889 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18890 intTI_type_node,
18891 integer_type_node, NULL_TREE);
18892 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18893 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18894 integer_type_node, NULL_TREE);
18895 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18900 static void
18901 htm_init_builtins (void)
18903 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18904 const struct builtin_description *d;
18905 size_t i;
18907 d = bdesc_htm;
18908 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18910 tree op[MAX_HTM_OPERANDS], type;
18911 HOST_WIDE_INT mask = d->mask;
18912 unsigned attr = rs6000_builtin_info[d->code].attr;
18913 bool void_func = (attr & RS6000_BTC_VOID);
18914 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18915 int nopnds = 0;
18916 tree gpr_type_node;
18917 tree rettype;
18918 tree argtype;
18920 /* It is expected that these htm built-in functions may have
18921 d->icode equal to CODE_FOR_nothing. */
18923 if (TARGET_32BIT && TARGET_POWERPC64)
18924 gpr_type_node = long_long_unsigned_type_node;
18925 else
18926 gpr_type_node = long_unsigned_type_node;
18928 if (attr & RS6000_BTC_SPR)
18930 rettype = gpr_type_node;
18931 argtype = gpr_type_node;
18933 else if (d->code == HTM_BUILTIN_TABORTDC
18934 || d->code == HTM_BUILTIN_TABORTDCI)
18936 rettype = unsigned_type_node;
18937 argtype = gpr_type_node;
18939 else
18941 rettype = unsigned_type_node;
18942 argtype = unsigned_type_node;
18945 if ((mask & builtin_mask) != mask)
18947 if (TARGET_DEBUG_BUILTIN)
18948 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18949 continue;
18952 if (d->name == 0)
18954 if (TARGET_DEBUG_BUILTIN)
18955 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18956 (long unsigned) i);
18957 continue;
18960 op[nopnds++] = (void_func) ? void_type_node : rettype;
18962 if (attr_args == RS6000_BTC_UNARY)
18963 op[nopnds++] = argtype;
18964 else if (attr_args == RS6000_BTC_BINARY)
18966 op[nopnds++] = argtype;
18967 op[nopnds++] = argtype;
18969 else if (attr_args == RS6000_BTC_TERNARY)
18971 op[nopnds++] = argtype;
18972 op[nopnds++] = argtype;
18973 op[nopnds++] = argtype;
18976 switch (nopnds)
18978 case 1:
18979 type = build_function_type_list (op[0], NULL_TREE);
18980 break;
18981 case 2:
18982 type = build_function_type_list (op[0], op[1], NULL_TREE);
18983 break;
18984 case 3:
18985 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18986 break;
18987 case 4:
18988 type = build_function_type_list (op[0], op[1], op[2], op[3],
18989 NULL_TREE);
18990 break;
18991 default:
18992 gcc_unreachable ();
18995 def_builtin (d->name, type, d->code);
18999 /* Hash function for builtin functions with up to 3 arguments and a return
19000 type. */
19001 hashval_t
19002 builtin_hasher::hash (builtin_hash_struct *bh)
19004 unsigned ret = 0;
19005 int i;
19007 for (i = 0; i < 4; i++)
19009 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
19010 ret = (ret * 2) + bh->uns_p[i];
19013 return ret;
19016 /* Compare builtin hash entries H1 and H2 for equivalence. */
19017 bool
19018 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
19020 return ((p1->mode[0] == p2->mode[0])
19021 && (p1->mode[1] == p2->mode[1])
19022 && (p1->mode[2] == p2->mode[2])
19023 && (p1->mode[3] == p2->mode[3])
19024 && (p1->uns_p[0] == p2->uns_p[0])
19025 && (p1->uns_p[1] == p2->uns_p[1])
19026 && (p1->uns_p[2] == p2->uns_p[2])
19027 && (p1->uns_p[3] == p2->uns_p[3]));
19030 /* Map types for builtin functions with an explicit return type and up to 3
19031 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
19032 of the argument. */
19033 static tree
19034 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
19035 machine_mode mode_arg1, machine_mode mode_arg2,
19036 enum rs6000_builtins builtin, const char *name)
19038 struct builtin_hash_struct h;
19039 struct builtin_hash_struct *h2;
19040 int num_args = 3;
19041 int i;
19042 tree ret_type = NULL_TREE;
19043 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
19045 /* Create builtin_hash_table. */
19046 if (builtin_hash_table == NULL)
19047 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
19049 h.type = NULL_TREE;
19050 h.mode[0] = mode_ret;
19051 h.mode[1] = mode_arg0;
19052 h.mode[2] = mode_arg1;
19053 h.mode[3] = mode_arg2;
19054 h.uns_p[0] = 0;
19055 h.uns_p[1] = 0;
19056 h.uns_p[2] = 0;
19057 h.uns_p[3] = 0;
19059 /* If the builtin is a type that produces unsigned results or takes unsigned
19060 arguments, and it is returned as a decl for the vectorizer (such as
19061 widening multiplies, permute), make sure the arguments and return value
19062 are type correct. */
19063 switch (builtin)
19065 /* unsigned 1 argument functions. */
19066 case CRYPTO_BUILTIN_VSBOX:
19067 case P8V_BUILTIN_VGBBD:
19068 case MISC_BUILTIN_CDTBCD:
19069 case MISC_BUILTIN_CBCDTD:
19070 h.uns_p[0] = 1;
19071 h.uns_p[1] = 1;
19072 break;
19074 /* unsigned 2 argument functions. */
19075 case ALTIVEC_BUILTIN_VMULEUB:
19076 case ALTIVEC_BUILTIN_VMULEUH:
19077 case ALTIVEC_BUILTIN_VMULOUB:
19078 case ALTIVEC_BUILTIN_VMULOUH:
19079 case CRYPTO_BUILTIN_VCIPHER:
19080 case CRYPTO_BUILTIN_VCIPHERLAST:
19081 case CRYPTO_BUILTIN_VNCIPHER:
19082 case CRYPTO_BUILTIN_VNCIPHERLAST:
19083 case CRYPTO_BUILTIN_VPMSUMB:
19084 case CRYPTO_BUILTIN_VPMSUMH:
19085 case CRYPTO_BUILTIN_VPMSUMW:
19086 case CRYPTO_BUILTIN_VPMSUMD:
19087 case CRYPTO_BUILTIN_VPMSUM:
19088 case MISC_BUILTIN_ADDG6S:
19089 case MISC_BUILTIN_DIVWEU:
19090 case MISC_BUILTIN_DIVWEUO:
19091 case MISC_BUILTIN_DIVDEU:
19092 case MISC_BUILTIN_DIVDEUO:
19093 case VSX_BUILTIN_UDIV_V2DI:
19094 h.uns_p[0] = 1;
19095 h.uns_p[1] = 1;
19096 h.uns_p[2] = 1;
19097 break;
19099 /* unsigned 3 argument functions. */
19100 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
19101 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
19102 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
19103 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
19104 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
19105 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
19106 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
19107 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
19108 case VSX_BUILTIN_VPERM_16QI_UNS:
19109 case VSX_BUILTIN_VPERM_8HI_UNS:
19110 case VSX_BUILTIN_VPERM_4SI_UNS:
19111 case VSX_BUILTIN_VPERM_2DI_UNS:
19112 case VSX_BUILTIN_XXSEL_16QI_UNS:
19113 case VSX_BUILTIN_XXSEL_8HI_UNS:
19114 case VSX_BUILTIN_XXSEL_4SI_UNS:
19115 case VSX_BUILTIN_XXSEL_2DI_UNS:
19116 case CRYPTO_BUILTIN_VPERMXOR:
19117 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
19118 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
19119 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
19120 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
19121 case CRYPTO_BUILTIN_VSHASIGMAW:
19122 case CRYPTO_BUILTIN_VSHASIGMAD:
19123 case CRYPTO_BUILTIN_VSHASIGMA:
19124 h.uns_p[0] = 1;
19125 h.uns_p[1] = 1;
19126 h.uns_p[2] = 1;
19127 h.uns_p[3] = 1;
19128 break;
19130 /* signed permute functions with unsigned char mask. */
19131 case ALTIVEC_BUILTIN_VPERM_16QI:
19132 case ALTIVEC_BUILTIN_VPERM_8HI:
19133 case ALTIVEC_BUILTIN_VPERM_4SI:
19134 case ALTIVEC_BUILTIN_VPERM_4SF:
19135 case ALTIVEC_BUILTIN_VPERM_2DI:
19136 case ALTIVEC_BUILTIN_VPERM_2DF:
19137 case VSX_BUILTIN_VPERM_16QI:
19138 case VSX_BUILTIN_VPERM_8HI:
19139 case VSX_BUILTIN_VPERM_4SI:
19140 case VSX_BUILTIN_VPERM_4SF:
19141 case VSX_BUILTIN_VPERM_2DI:
19142 case VSX_BUILTIN_VPERM_2DF:
19143 h.uns_p[3] = 1;
19144 break;
19146 /* unsigned args, signed return. */
19147 case VSX_BUILTIN_XVCVUXDSP:
19148 case VSX_BUILTIN_XVCVUXDDP_UNS:
19149 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
19150 h.uns_p[1] = 1;
19151 break;
19153 /* signed args, unsigned return. */
19154 case VSX_BUILTIN_XVCVDPUXDS_UNS:
19155 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
19156 case MISC_BUILTIN_UNPACK_TD:
19157 case MISC_BUILTIN_UNPACK_V1TI:
19158 h.uns_p[0] = 1;
19159 break;
19161 /* unsigned arguments for 128-bit pack instructions. */
19162 case MISC_BUILTIN_PACK_TD:
19163 case MISC_BUILTIN_PACK_V1TI:
19164 h.uns_p[1] = 1;
19165 h.uns_p[2] = 1;
19166 break;
19168 default:
19169 break;
19172 /* Figure out how many args are present. */
19173 while (num_args > 0 && h.mode[num_args] == VOIDmode)
19174 num_args--;
19176 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
19177 if (!ret_type && h.uns_p[0])
19178 ret_type = builtin_mode_to_type[h.mode[0]][0];
19180 if (!ret_type)
19181 fatal_error (input_location,
19182 "internal error: builtin function %s had an unexpected "
19183 "return type %s", name, GET_MODE_NAME (h.mode[0]));
19185 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
19186 arg_type[i] = NULL_TREE;
19188 for (i = 0; i < num_args; i++)
19190 int m = (int) h.mode[i+1];
19191 int uns_p = h.uns_p[i+1];
19193 arg_type[i] = builtin_mode_to_type[m][uns_p];
19194 if (!arg_type[i] && uns_p)
19195 arg_type[i] = builtin_mode_to_type[m][0];
19197 if (!arg_type[i])
19198 fatal_error (input_location,
19199 "internal error: builtin function %s, argument %d "
19200 "had unexpected argument type %s", name, i,
19201 GET_MODE_NAME (m));
19204 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
19205 if (*found == NULL)
19207 h2 = ggc_alloc<builtin_hash_struct> ();
19208 *h2 = h;
19209 *found = h2;
19211 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
19212 arg_type[2], NULL_TREE);
19215 return (*found)->type;
19218 static void
19219 rs6000_common_init_builtins (void)
19221 const struct builtin_description *d;
19222 size_t i;
19224 tree opaque_ftype_opaque = NULL_TREE;
19225 tree opaque_ftype_opaque_opaque = NULL_TREE;
19226 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
19227 tree v2si_ftype = NULL_TREE;
19228 tree v2si_ftype_qi = NULL_TREE;
19229 tree v2si_ftype_v2si_qi = NULL_TREE;
19230 tree v2si_ftype_int_qi = NULL_TREE;
19231 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
19233 if (!TARGET_PAIRED_FLOAT)
19235 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
19236 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
19239 /* Paired and SPE builtins are only available if you build a compiler with
19240 the appropriate options, so only create those builtins with the
19241 appropriate compiler option. Create Altivec and VSX builtins on machines
19242 with at least the general purpose extensions (970 and newer) to allow the
19243 use of the target attribute.. */
19245 if (TARGET_EXTRA_BUILTINS)
19246 builtin_mask |= RS6000_BTM_COMMON;
19248 /* Add the ternary operators. */
19249 d = bdesc_3arg;
19250 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
19252 tree type;
19253 HOST_WIDE_INT mask = d->mask;
19255 if ((mask & builtin_mask) != mask)
19257 if (TARGET_DEBUG_BUILTIN)
19258 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
19259 continue;
19262 if (rs6000_overloaded_builtin_p (d->code))
19264 if (! (type = opaque_ftype_opaque_opaque_opaque))
19265 type = opaque_ftype_opaque_opaque_opaque
19266 = build_function_type_list (opaque_V4SI_type_node,
19267 opaque_V4SI_type_node,
19268 opaque_V4SI_type_node,
19269 opaque_V4SI_type_node,
19270 NULL_TREE);
19272 else
19274 enum insn_code icode = d->icode;
19275 if (d->name == 0)
19277 if (TARGET_DEBUG_BUILTIN)
19278 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19279 (long unsigned)i);
19281 continue;
19284 if (icode == CODE_FOR_nothing)
19286 if (TARGET_DEBUG_BUILTIN)
19287 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
19288 d->name);
19290 continue;
19293 type = builtin_function_type (insn_data[icode].operand[0].mode,
19294 insn_data[icode].operand[1].mode,
19295 insn_data[icode].operand[2].mode,
19296 insn_data[icode].operand[3].mode,
19297 d->code, d->name);
19300 def_builtin (d->name, type, d->code);
19303 /* Add the binary operators. */
19304 d = bdesc_2arg;
19305 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19307 machine_mode mode0, mode1, mode2;
19308 tree type;
19309 HOST_WIDE_INT mask = d->mask;
19311 if ((mask & builtin_mask) != mask)
19313 if (TARGET_DEBUG_BUILTIN)
19314 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
19315 continue;
19318 if (rs6000_overloaded_builtin_p (d->code))
19320 if (! (type = opaque_ftype_opaque_opaque))
19321 type = opaque_ftype_opaque_opaque
19322 = build_function_type_list (opaque_V4SI_type_node,
19323 opaque_V4SI_type_node,
19324 opaque_V4SI_type_node,
19325 NULL_TREE);
19327 else
19329 enum insn_code icode = d->icode;
19330 if (d->name == 0)
19332 if (TARGET_DEBUG_BUILTIN)
19333 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19334 (long unsigned)i);
19336 continue;
19339 if (icode == CODE_FOR_nothing)
19341 if (TARGET_DEBUG_BUILTIN)
19342 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
19343 d->name);
19345 continue;
19348 mode0 = insn_data[icode].operand[0].mode;
19349 mode1 = insn_data[icode].operand[1].mode;
19350 mode2 = insn_data[icode].operand[2].mode;
19352 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
19354 if (! (type = v2si_ftype_v2si_qi))
19355 type = v2si_ftype_v2si_qi
19356 = build_function_type_list (opaque_V2SI_type_node,
19357 opaque_V2SI_type_node,
19358 char_type_node,
19359 NULL_TREE);
19362 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
19363 && mode2 == QImode)
19365 if (! (type = v2si_ftype_int_qi))
19366 type = v2si_ftype_int_qi
19367 = build_function_type_list (opaque_V2SI_type_node,
19368 integer_type_node,
19369 char_type_node,
19370 NULL_TREE);
19373 else
19374 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
19375 d->code, d->name);
19378 def_builtin (d->name, type, d->code);
19381 /* Add the simple unary operators. */
19382 d = bdesc_1arg;
19383 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19385 machine_mode mode0, mode1;
19386 tree type;
19387 HOST_WIDE_INT mask = d->mask;
19389 if ((mask & builtin_mask) != mask)
19391 if (TARGET_DEBUG_BUILTIN)
19392 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
19393 continue;
19396 if (rs6000_overloaded_builtin_p (d->code))
19398 if (! (type = opaque_ftype_opaque))
19399 type = opaque_ftype_opaque
19400 = build_function_type_list (opaque_V4SI_type_node,
19401 opaque_V4SI_type_node,
19402 NULL_TREE);
19404 else
19406 enum insn_code icode = d->icode;
19407 if (d->name == 0)
19409 if (TARGET_DEBUG_BUILTIN)
19410 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19411 (long unsigned)i);
19413 continue;
19416 if (icode == CODE_FOR_nothing)
19418 if (TARGET_DEBUG_BUILTIN)
19419 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19420 d->name);
19422 continue;
19425 mode0 = insn_data[icode].operand[0].mode;
19426 mode1 = insn_data[icode].operand[1].mode;
19428 if (mode0 == V2SImode && mode1 == QImode)
19430 if (! (type = v2si_ftype_qi))
19431 type = v2si_ftype_qi
19432 = build_function_type_list (opaque_V2SI_type_node,
19433 char_type_node,
19434 NULL_TREE);
19437 else
19438 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19439 d->code, d->name);
19442 def_builtin (d->name, type, d->code);
19445 /* Add the simple no-argument operators. */
19446 d = bdesc_0arg;
19447 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19449 machine_mode mode0;
19450 tree type;
19451 HOST_WIDE_INT mask = d->mask;
19453 if ((mask & builtin_mask) != mask)
19455 if (TARGET_DEBUG_BUILTIN)
19456 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19457 continue;
19459 if (rs6000_overloaded_builtin_p (d->code))
19461 if (!opaque_ftype_opaque)
19462 opaque_ftype_opaque
19463 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19464 type = opaque_ftype_opaque;
19466 else
19468 enum insn_code icode = d->icode;
19469 if (d->name == 0)
19471 if (TARGET_DEBUG_BUILTIN)
19472 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19473 (long unsigned) i);
19474 continue;
19476 if (icode == CODE_FOR_nothing)
19478 if (TARGET_DEBUG_BUILTIN)
19479 fprintf (stderr,
19480 "rs6000_builtin, skip no-argument %s (no code)\n",
19481 d->name);
19482 continue;
19484 mode0 = insn_data[icode].operand[0].mode;
19485 if (mode0 == V2SImode)
19487 /* code for SPE */
19488 if (! (type = v2si_ftype))
19490 v2si_ftype
19491 = build_function_type_list (opaque_V2SI_type_node,
19492 NULL_TREE);
19493 type = v2si_ftype;
19496 else
19497 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19498 d->code, d->name);
19500 def_builtin (d->name, type, d->code);
19504 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19505 static void
19506 init_float128_ibm (machine_mode mode)
19508 if (!TARGET_XL_COMPAT)
19510 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19511 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19512 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19513 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19515 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19517 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19518 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19519 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19520 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19521 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19522 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19523 set_optab_libfunc (le_optab, mode, "__gcc_qle");
19525 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19526 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19527 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19528 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19529 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19530 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19531 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19532 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19535 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19536 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19538 else
19540 set_optab_libfunc (add_optab, mode, "_xlqadd");
19541 set_optab_libfunc (sub_optab, mode, "_xlqsub");
19542 set_optab_libfunc (smul_optab, mode, "_xlqmul");
19543 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19546 /* Add various conversions for IFmode to use the traditional TFmode
19547 names. */
19548 if (mode == IFmode)
19550 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19551 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19552 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19553 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19554 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19555 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19557 if (TARGET_POWERPC64)
19559 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19560 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19561 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19562 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19567 /* Set up IEEE 128-bit floating point routines. Use different names if the
19568 arguments can be passed in a vector register. The historical PowerPC
19569 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19570 continue to use that if we aren't using vector registers to pass IEEE
19571 128-bit floating point. */
19573 static void
19574 init_float128_ieee (machine_mode mode)
19576 if (FLOAT128_VECTOR_P (mode))
19578 set_optab_libfunc (add_optab, mode, "__addkf3");
19579 set_optab_libfunc (sub_optab, mode, "__subkf3");
19580 set_optab_libfunc (neg_optab, mode, "__negkf2");
19581 set_optab_libfunc (smul_optab, mode, "__mulkf3");
19582 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19583 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19584 set_optab_libfunc (abs_optab, mode, "__abstkf2");
19586 set_optab_libfunc (eq_optab, mode, "__eqkf2");
19587 set_optab_libfunc (ne_optab, mode, "__nekf2");
19588 set_optab_libfunc (gt_optab, mode, "__gtkf2");
19589 set_optab_libfunc (ge_optab, mode, "__gekf2");
19590 set_optab_libfunc (lt_optab, mode, "__ltkf2");
19591 set_optab_libfunc (le_optab, mode, "__lekf2");
19592 set_optab_libfunc (unord_optab, mode, "__unordkf2");
19594 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19595 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19596 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19597 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19599 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19600 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19601 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19603 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19604 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19605 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19607 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19608 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19609 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19610 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19611 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19612 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19614 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19615 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19616 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19617 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19619 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19620 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19621 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19622 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19624 if (TARGET_POWERPC64)
19626 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19627 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19628 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19629 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19633 else
19635 set_optab_libfunc (add_optab, mode, "_q_add");
19636 set_optab_libfunc (sub_optab, mode, "_q_sub");
19637 set_optab_libfunc (neg_optab, mode, "_q_neg");
19638 set_optab_libfunc (smul_optab, mode, "_q_mul");
19639 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19640 if (TARGET_PPC_GPOPT)
19641 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19643 set_optab_libfunc (eq_optab, mode, "_q_feq");
19644 set_optab_libfunc (ne_optab, mode, "_q_fne");
19645 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19646 set_optab_libfunc (ge_optab, mode, "_q_fge");
19647 set_optab_libfunc (lt_optab, mode, "_q_flt");
19648 set_optab_libfunc (le_optab, mode, "_q_fle");
19650 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19651 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19652 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19653 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19654 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19655 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19656 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19657 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19661 static void
19662 rs6000_init_libfuncs (void)
19664 /* __float128 support. */
19665 if (TARGET_FLOAT128_TYPE)
19667 init_float128_ibm (IFmode);
19668 init_float128_ieee (KFmode);
19671 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19672 if (TARGET_LONG_DOUBLE_128)
19674 if (!TARGET_IEEEQUAD)
19675 init_float128_ibm (TFmode);
19677 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19678 else
19679 init_float128_ieee (TFmode);
19684 /* Expand a block clear operation, and return 1 if successful. Return 0
19685 if we should let the compiler generate normal code.
19687 operands[0] is the destination
19688 operands[1] is the length
19689 operands[3] is the alignment */
19692 expand_block_clear (rtx operands[])
19694 rtx orig_dest = operands[0];
19695 rtx bytes_rtx = operands[1];
19696 rtx align_rtx = operands[3];
19697 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19698 HOST_WIDE_INT align;
19699 HOST_WIDE_INT bytes;
19700 int offset;
19701 int clear_bytes;
19702 int clear_step;
19704 /* If this is not a fixed size move, just call memcpy */
19705 if (! constp)
19706 return 0;
19708 /* This must be a fixed size alignment */
19709 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19710 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19712 /* Anything to clear? */
19713 bytes = INTVAL (bytes_rtx);
19714 if (bytes <= 0)
19715 return 1;
19717 /* Use the builtin memset after a point, to avoid huge code bloat.
19718 When optimize_size, avoid any significant code bloat; calling
19719 memset is about 4 instructions, so allow for one instruction to
19720 load zero and three to do clearing. */
19721 if (TARGET_ALTIVEC && align >= 128)
19722 clear_step = 16;
19723 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19724 clear_step = 8;
19725 else if (TARGET_SPE && align >= 64)
19726 clear_step = 8;
19727 else
19728 clear_step = 4;
19730 if (optimize_size && bytes > 3 * clear_step)
19731 return 0;
19732 if (! optimize_size && bytes > 8 * clear_step)
19733 return 0;
19735 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19737 machine_mode mode = BLKmode;
19738 rtx dest;
19740 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19742 clear_bytes = 16;
19743 mode = V4SImode;
19745 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19747 clear_bytes = 8;
19748 mode = V2SImode;
19750 else if (bytes >= 8 && TARGET_POWERPC64
19751 && (align >= 64 || !STRICT_ALIGNMENT))
19753 clear_bytes = 8;
19754 mode = DImode;
19755 if (offset == 0 && align < 64)
19757 rtx addr;
19759 /* If the address form is reg+offset with offset not a
19760 multiple of four, reload into reg indirect form here
19761 rather than waiting for reload. This way we get one
19762 reload, not one per store. */
19763 addr = XEXP (orig_dest, 0);
19764 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19765 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19766 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19768 addr = copy_addr_to_reg (addr);
19769 orig_dest = replace_equiv_address (orig_dest, addr);
19773 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19774 { /* move 4 bytes */
19775 clear_bytes = 4;
19776 mode = SImode;
19778 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19779 { /* move 2 bytes */
19780 clear_bytes = 2;
19781 mode = HImode;
19783 else /* move 1 byte at a time */
19785 clear_bytes = 1;
19786 mode = QImode;
19789 dest = adjust_address (orig_dest, mode, offset);
19791 emit_move_insn (dest, CONST0_RTX (mode));
19794 return 1;
19797 /* Emit a potentially record-form instruction, setting DST from SRC.
19798 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19799 signed comparison of DST with zero. If DOT is 1, the generated RTL
19800 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19801 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19802 a separate COMPARE. */
19804 static void
19805 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19807 if (dot == 0)
19809 emit_move_insn (dst, src);
19810 return;
19813 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19815 emit_move_insn (dst, src);
19816 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19817 return;
19820 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19821 if (dot == 1)
19823 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19824 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19826 else
19828 rtx set = gen_rtx_SET (dst, src);
19829 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19833 /* Figure out the correct instructions to generate to load data for
19834 block compare. MODE is used for the read from memory, and
19835 data is zero extended if REG is wider than MODE. If LE code
19836 is being generated, bswap loads are used.
19838 REG is the destination register to move the data into.
19839 MEM is the memory block being read.
19840 MODE is the mode of memory to use for the read. */
19841 static void
19842 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19844 switch (GET_MODE (reg))
19846 case E_DImode:
19847 switch (mode)
19849 case E_QImode:
19850 emit_insn (gen_zero_extendqidi2 (reg, mem));
19851 break;
19852 case E_HImode:
19854 rtx src = mem;
19855 if (!BYTES_BIG_ENDIAN)
19857 src = gen_reg_rtx (HImode);
19858 emit_insn (gen_bswaphi2 (src, mem));
19860 emit_insn (gen_zero_extendhidi2 (reg, src));
19861 break;
19863 case E_SImode:
19865 rtx src = mem;
19866 if (!BYTES_BIG_ENDIAN)
19868 src = gen_reg_rtx (SImode);
19869 emit_insn (gen_bswapsi2 (src, mem));
19871 emit_insn (gen_zero_extendsidi2 (reg, src));
19873 break;
19874 case E_DImode:
19875 if (!BYTES_BIG_ENDIAN)
19876 emit_insn (gen_bswapdi2 (reg, mem));
19877 else
19878 emit_insn (gen_movdi (reg, mem));
19879 break;
19880 default:
19881 gcc_unreachable ();
19883 break;
19885 case E_SImode:
19886 switch (mode)
19888 case E_QImode:
19889 emit_insn (gen_zero_extendqisi2 (reg, mem));
19890 break;
19891 case E_HImode:
19893 rtx src = mem;
19894 if (!BYTES_BIG_ENDIAN)
19896 src = gen_reg_rtx (HImode);
19897 emit_insn (gen_bswaphi2 (src, mem));
19899 emit_insn (gen_zero_extendhisi2 (reg, src));
19900 break;
19902 case E_SImode:
19903 if (!BYTES_BIG_ENDIAN)
19904 emit_insn (gen_bswapsi2 (reg, mem));
19905 else
19906 emit_insn (gen_movsi (reg, mem));
19907 break;
19908 case E_DImode:
19909 /* DImode is larger than the destination reg so is not expected. */
19910 gcc_unreachable ();
19911 break;
19912 default:
19913 gcc_unreachable ();
19915 break;
19916 default:
19917 gcc_unreachable ();
19918 break;
19922 /* Select the mode to be used for reading the next chunk of bytes
19923 in the compare.
19925 OFFSET is the current read offset from the beginning of the block.
19926 BYTES is the number of bytes remaining to be read.
19927 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19928 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19929 the largest allowable mode. */
19930 static machine_mode
19931 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19932 unsigned HOST_WIDE_INT bytes,
19933 unsigned HOST_WIDE_INT align, bool word_mode_ok)
19935 /* First see if we can do a whole load unit
19936 as that will be more efficient than a larger load + shift. */
19938 /* If big, use biggest chunk.
19939 If exactly chunk size, use that size.
19940 If remainder can be done in one piece with shifting, do that.
19941 Do largest chunk possible without violating alignment rules. */
19943 /* The most we can read without potential page crossing. */
19944 unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19946 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19947 return word_mode;
19948 else if (bytes == GET_MODE_SIZE (SImode))
19949 return SImode;
19950 else if (bytes == GET_MODE_SIZE (HImode))
19951 return HImode;
19952 else if (bytes == GET_MODE_SIZE (QImode))
19953 return QImode;
19954 else if (bytes < GET_MODE_SIZE (SImode)
19955 && offset >= GET_MODE_SIZE (SImode) - bytes)
19956 /* This matches the case were we have SImode and 3 bytes
19957 and offset >= 1 and permits us to move back one and overlap
19958 with the previous read, thus avoiding having to shift
19959 unwanted bytes off of the input. */
19960 return SImode;
19961 else if (word_mode_ok && bytes < UNITS_PER_WORD
19962 && offset >= UNITS_PER_WORD-bytes)
19963 /* Similarly, if we can use DImode it will get matched here and
19964 can do an overlapping read that ends at the end of the block. */
19965 return word_mode;
19966 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19967 /* It is safe to do all remaining in one load of largest size,
19968 possibly with a shift to get rid of unwanted bytes. */
19969 return word_mode;
19970 else if (maxread >= GET_MODE_SIZE (SImode))
19971 /* It is safe to do all remaining in one SImode load,
19972 possibly with a shift to get rid of unwanted bytes. */
19973 return SImode;
19974 else if (bytes > GET_MODE_SIZE (SImode))
19975 return SImode;
19976 else if (bytes > GET_MODE_SIZE (HImode))
19977 return HImode;
19979 /* final fallback is do one byte */
19980 return QImode;
19983 /* Compute the alignment of pointer+OFFSET where the original alignment
19984 of pointer was BASE_ALIGN. */
19985 static unsigned HOST_WIDE_INT
19986 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19987 unsigned HOST_WIDE_INT offset)
19989 if (offset == 0)
19990 return base_align;
19991 return min (base_align, offset & -offset);
19994 /* Expand a block compare operation, and return true if successful.
19995 Return false if we should let the compiler generate normal code,
19996 probably a memcmp call.
19998 OPERANDS[0] is the target (result).
19999 OPERANDS[1] is the first source.
20000 OPERANDS[2] is the second source.
20001 OPERANDS[3] is the length.
20002 OPERANDS[4] is the alignment. */
20003 bool
20004 expand_block_compare (rtx operands[])
20006 rtx target = operands[0];
20007 rtx orig_src1 = operands[1];
20008 rtx orig_src2 = operands[2];
20009 rtx bytes_rtx = operands[3];
20010 rtx align_rtx = operands[4];
20011 HOST_WIDE_INT cmp_bytes = 0;
20012 rtx src1 = orig_src1;
20013 rtx src2 = orig_src2;
20015 /* This case is complicated to handle because the subtract
20016 with carry instructions do not generate the 64-bit
20017 carry and so we must emit code to calculate it ourselves.
20018 We choose not to implement this yet. */
20019 if (TARGET_32BIT && TARGET_POWERPC64)
20020 return false;
20022 /* If this is not a fixed size compare, just call memcmp. */
20023 if (!CONST_INT_P (bytes_rtx))
20024 return false;
20026 /* This must be a fixed size alignment. */
20027 if (!CONST_INT_P (align_rtx))
20028 return false;
20030 unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
20032 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20033 if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
20034 || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
20035 return false;
20037 gcc_assert (GET_MODE (target) == SImode);
20039 /* Anything to move? */
20040 unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
20041 if (bytes == 0)
20042 return true;
20044 /* The code generated for p7 and older is not faster than glibc
20045 memcmp if alignment is small and length is not short, so bail
20046 out to avoid those conditions. */
20047 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
20048 && ((base_align == 1 && bytes > 16)
20049 || (base_align == 2 && bytes > 32)))
20050 return false;
20052 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20053 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20054 /* P7/P8 code uses cond for subfc. but P9 uses
20055 it for cmpld which needs CCUNSmode. */
20056 rtx cond;
20057 if (TARGET_P9_MISC)
20058 cond = gen_reg_rtx (CCUNSmode);
20059 else
20060 cond = gen_reg_rtx (CCmode);
20062 /* If we have an LE target without ldbrx and word_mode is DImode,
20063 then we must avoid using word_mode. */
20064 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20065 && word_mode == DImode);
20067 /* Strategy phase. How many ops will this take and should we expand it? */
20069 unsigned HOST_WIDE_INT offset = 0;
20070 machine_mode load_mode =
20071 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20072 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20074 /* We don't want to generate too much code. */
20075 unsigned HOST_WIDE_INT max_bytes =
20076 load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
20077 if (!IN_RANGE (bytes, 1, max_bytes))
20078 return false;
20080 bool generate_6432_conversion = false;
20081 rtx convert_label = NULL;
20082 rtx final_label = NULL;
20084 /* Example of generated code for 18 bytes aligned 1 byte.
20085 Compiled with -fno-reorder-blocks for clarity.
20086 ldbrx 10,31,8
20087 ldbrx 9,7,8
20088 subfc. 9,9,10
20089 bne 0,.L6487
20090 addi 9,12,8
20091 addi 5,11,8
20092 ldbrx 10,0,9
20093 ldbrx 9,0,5
20094 subfc. 9,9,10
20095 bne 0,.L6487
20096 addi 9,12,16
20097 lhbrx 10,0,9
20098 addi 9,11,16
20099 lhbrx 9,0,9
20100 subf 9,9,10
20101 b .L6488
20102 .p2align 4,,15
20103 .L6487: #convert_label
20104 popcntd 9,9
20105 subfe 10,10,10
20106 or 9,9,10
20107 .L6488: #final_label
20108 extsw 10,9
20110 We start off with DImode for two blocks that jump to the DI->SI conversion
20111 if the difference is found there, then a final block of HImode that skips
20112 the DI->SI conversion. */
20114 while (bytes > 0)
20116 unsigned int align = compute_current_alignment (base_align, offset);
20117 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20118 load_mode = select_block_compare_mode (offset, bytes, align,
20119 word_mode_ok);
20120 else
20121 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
20122 load_mode_size = GET_MODE_SIZE (load_mode);
20123 if (bytes >= load_mode_size)
20124 cmp_bytes = load_mode_size;
20125 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20127 /* Move this load back so it doesn't go past the end.
20128 P8/P9 can do this efficiently. */
20129 unsigned int extra_bytes = load_mode_size - bytes;
20130 cmp_bytes = bytes;
20131 if (extra_bytes < offset)
20133 offset -= extra_bytes;
20134 cmp_bytes = load_mode_size;
20135 bytes = cmp_bytes;
20138 else
20139 /* P7 and earlier can't do the overlapping load trick fast,
20140 so this forces a non-overlapping load and a shift to get
20141 rid of the extra bytes. */
20142 cmp_bytes = bytes;
20144 src1 = adjust_address (orig_src1, load_mode, offset);
20145 src2 = adjust_address (orig_src2, load_mode, offset);
20147 if (!REG_P (XEXP (src1, 0)))
20149 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20150 src1 = replace_equiv_address (src1, src1_reg);
20152 set_mem_size (src1, cmp_bytes);
20154 if (!REG_P (XEXP (src2, 0)))
20156 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20157 src2 = replace_equiv_address (src2, src2_reg);
20159 set_mem_size (src2, cmp_bytes);
20161 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20162 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20164 if (cmp_bytes < load_mode_size)
20166 /* Shift unneeded bytes off. */
20167 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
20168 if (word_mode == DImode)
20170 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
20171 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
20173 else
20175 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20176 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20180 int remain = bytes - cmp_bytes;
20181 if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
20183 /* Target is larger than load size so we don't need to
20184 reduce result size. */
20186 /* We previously did a block that need 64->32 conversion but
20187 the current block does not, so a label is needed to jump
20188 to the end. */
20189 if (generate_6432_conversion && !final_label)
20190 final_label = gen_label_rtx ();
20192 if (remain > 0)
20194 /* This is not the last block, branch to the end if the result
20195 of this subtract is not zero. */
20196 if (!final_label)
20197 final_label = gen_label_rtx ();
20198 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20199 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20200 rtx cr = gen_reg_rtx (CCmode);
20201 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
20202 emit_insn (gen_movsi (target,
20203 gen_lowpart (SImode, tmp_reg_src2)));
20204 rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
20205 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20206 fin_ref, pc_rtx);
20207 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20208 JUMP_LABEL (j) = final_label;
20209 LABEL_NUSES (final_label) += 1;
20211 else
20213 if (word_mode == DImode)
20215 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
20216 tmp_reg_src2));
20217 emit_insn (gen_movsi (target,
20218 gen_lowpart (SImode, tmp_reg_src2)));
20220 else
20221 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
20223 if (final_label)
20225 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20226 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20227 JUMP_LABEL(j) = final_label;
20228 LABEL_NUSES (final_label) += 1;
20229 emit_barrier ();
20233 else
20235 /* Do we need a 64->32 conversion block? We need the 64->32
20236 conversion even if target size == load_mode size because
20237 the subtract generates one extra bit. */
20238 generate_6432_conversion = true;
20240 if (remain > 0)
20242 if (!convert_label)
20243 convert_label = gen_label_rtx ();
20245 /* Compare to zero and branch to convert_label if not zero. */
20246 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
20247 if (TARGET_P9_MISC)
20249 /* Generate a compare, and convert with a setb later. */
20250 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20251 tmp_reg_src2);
20252 emit_insn (gen_rtx_SET (cond, cmp));
20254 else
20255 /* Generate a subfc. and use the longer
20256 sequence for conversion. */
20257 if (TARGET_64BIT)
20258 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20259 tmp_reg_src1, cond));
20260 else
20261 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20262 tmp_reg_src1, cond));
20263 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20264 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20265 cvt_ref, pc_rtx);
20266 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20267 JUMP_LABEL(j) = convert_label;
20268 LABEL_NUSES (convert_label) += 1;
20270 else
20272 /* Just do the subtract/compare. Since this is the last block
20273 the convert code will be generated immediately following. */
20274 if (TARGET_P9_MISC)
20276 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20277 tmp_reg_src2);
20278 emit_insn (gen_rtx_SET (cond, cmp));
20280 else
20281 if (TARGET_64BIT)
20282 emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
20283 tmp_reg_src1));
20284 else
20285 emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
20286 tmp_reg_src1));
20290 offset += cmp_bytes;
20291 bytes -= cmp_bytes;
20294 if (generate_6432_conversion)
20296 if (convert_label)
20297 emit_label (convert_label);
20299 /* We need to produce DI result from sub, then convert to target SI
20300 while maintaining <0 / ==0 / >0 properties. This sequence works:
20301 subfc L,A,B
20302 subfe H,H,H
20303 popcntd L,L
20304 rldimi L,H,6,0
20306 This is an alternate one Segher cooked up if somebody
20307 wants to expand this for something that doesn't have popcntd:
20308 subfc L,a,b
20309 subfe H,x,x
20310 addic t,L,-1
20311 subfe v,t,L
20312 or z,v,H
20314 And finally, p9 can just do this:
20315 cmpld A,B
20316 setb r */
20318 if (TARGET_P9_MISC)
20320 emit_insn (gen_setb_unsigned (target, cond));
20322 else
20324 if (TARGET_64BIT)
20326 rtx tmp_reg_ca = gen_reg_rtx (DImode);
20327 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
20328 emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
20329 emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
20330 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
20332 else
20334 rtx tmp_reg_ca = gen_reg_rtx (SImode);
20335 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
20336 emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
20337 emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
20342 if (final_label)
20343 emit_label (final_label);
20345 gcc_assert (bytes == 0);
20346 return true;
20349 /* Generate alignment check and branch code to set up for
20350 strncmp when we don't have DI alignment.
20351 STRNCMP_LABEL is the label to branch if there is a page crossing.
20352 SRC is the string pointer to be examined.
20353 BYTES is the max number of bytes to compare. */
20354 static void
20355 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
20357 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
20358 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
20359 if (GET_MODE (src_check) == SImode)
20360 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
20361 else
20362 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
20363 rtx cond = gen_reg_rtx (CCmode);
20364 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
20365 GEN_INT (4096 - bytes)));
20367 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
20369 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20370 pc_rtx, lab_ref);
20371 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20372 JUMP_LABEL (j) = strncmp_label;
20373 LABEL_NUSES (strncmp_label) += 1;
20376 /* Expand a string compare operation with length, and return
20377 true if successful. Return false if we should let the
20378 compiler generate normal code, probably a strncmp call.
20380 OPERANDS[0] is the target (result).
20381 OPERANDS[1] is the first source.
20382 OPERANDS[2] is the second source.
20383 If NO_LENGTH is zero, then:
20384 OPERANDS[3] is the length.
20385 OPERANDS[4] is the alignment in bytes.
20386 If NO_LENGTH is nonzero, then:
20387 OPERANDS[3] is the alignment in bytes. */
20388 bool
20389 expand_strn_compare (rtx operands[], int no_length)
20391 rtx target = operands[0];
20392 rtx orig_src1 = operands[1];
20393 rtx orig_src2 = operands[2];
20394 rtx bytes_rtx, align_rtx;
20395 if (no_length)
20397 bytes_rtx = NULL;
20398 align_rtx = operands[3];
20400 else
20402 bytes_rtx = operands[3];
20403 align_rtx = operands[4];
20405 unsigned HOST_WIDE_INT cmp_bytes = 0;
20406 rtx src1 = orig_src1;
20407 rtx src2 = orig_src2;
20409 /* If we have a length, it must be constant. This simplifies things
20410 a bit as we don't have to generate code to check if we've exceeded
20411 the length. Later this could be expanded to handle this case. */
20412 if (!no_length && !CONST_INT_P (bytes_rtx))
20413 return false;
20415 /* This must be a fixed size alignment. */
20416 if (!CONST_INT_P (align_rtx))
20417 return false;
20419 unsigned int base_align = UINTVAL (align_rtx);
20420 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20421 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20423 /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */
20424 if (rs6000_slow_unaligned_access (word_mode, align1)
20425 || rs6000_slow_unaligned_access (word_mode, align2))
20426 return false;
20428 gcc_assert (GET_MODE (target) == SImode);
20430 /* If we have an LE target without ldbrx and word_mode is DImode,
20431 then we must avoid using word_mode. */
20432 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20433 && word_mode == DImode);
20435 unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20437 unsigned HOST_WIDE_INT offset = 0;
20438 unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
20439 unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
20440 if (no_length)
20441 /* Use this as a standin to determine the mode to use. */
20442 bytes = rs6000_string_compare_inline_limit * word_mode_size;
20443 else
20444 bytes = UINTVAL (bytes_rtx);
20446 machine_mode load_mode =
20447 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20448 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20449 compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20451 /* If we have equality at the end of the last compare and we have not
20452 found the end of the string, we need to call strcmp/strncmp to
20453 compare the remainder. */
20454 bool equality_compare_rest = false;
20456 if (no_length)
20458 bytes = compare_length;
20459 equality_compare_rest = true;
20461 else
20463 if (bytes <= compare_length)
20464 compare_length = bytes;
20465 else
20466 equality_compare_rest = true;
20469 rtx result_reg = gen_reg_rtx (word_mode);
20470 rtx final_move_label = gen_label_rtx ();
20471 rtx final_label = gen_label_rtx ();
20472 rtx begin_compare_label = NULL;
20474 if (base_align < 8)
20476 /* Generate code that checks distance to 4k boundary for this case. */
20477 begin_compare_label = gen_label_rtx ();
20478 rtx strncmp_label = gen_label_rtx ();
20479 rtx jmp;
20481 /* Strncmp for power8 in glibc does this:
20482 rldicl r8,r3,0,52
20483 cmpldi cr7,r8,4096-16
20484 bgt cr7,L(pagecross) */
20486 /* Make sure that the length we use for the alignment test and
20487 the subsequent code generation are in agreement so we do not
20488 go past the length we tested for a 4k boundary crossing. */
20489 unsigned HOST_WIDE_INT align_test = compare_length;
20490 if (align_test < 8)
20492 align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20493 base_align = align_test;
20495 else
20497 align_test = ROUND_UP (align_test, 8);
20498 base_align = 8;
20501 if (align1 < 8)
20502 expand_strncmp_align_check (strncmp_label, src1, align_test);
20503 if (align2 < 8)
20504 expand_strncmp_align_check (strncmp_label, src2, align_test);
20506 /* Now generate the following sequence:
20507 - branch to begin_compare
20508 - strncmp_label
20509 - call to strncmp
20510 - branch to final_label
20511 - begin_compare_label */
20513 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20514 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20515 JUMP_LABEL (jmp) = begin_compare_label;
20516 LABEL_NUSES (begin_compare_label) += 1;
20517 emit_barrier ();
20519 emit_label (strncmp_label);
20521 if (!REG_P (XEXP (src1, 0)))
20523 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20524 src1 = replace_equiv_address (src1, src1_reg);
20527 if (!REG_P (XEXP (src2, 0)))
20529 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20530 src2 = replace_equiv_address (src2, src2_reg);
20533 if (no_length)
20535 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20536 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20537 target, LCT_NORMAL, GET_MODE (target),
20538 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20539 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20541 else
20543 /* -m32 -mpowerpc64 results in word_mode being DImode even
20544 though otherwise it is 32-bit. The length arg to strncmp
20545 is a size_t which will be the same size as pointers. */
20546 rtx len_rtx;
20547 if (TARGET_64BIT)
20548 len_rtx = gen_reg_rtx (DImode);
20549 else
20550 len_rtx = gen_reg_rtx (SImode);
20552 emit_move_insn (len_rtx, bytes_rtx);
20554 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20555 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20556 target, LCT_NORMAL, GET_MODE (target),
20557 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20558 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20559 len_rtx, GET_MODE (len_rtx));
20562 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20563 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20564 JUMP_LABEL (jmp) = final_label;
20565 LABEL_NUSES (final_label) += 1;
20566 emit_barrier ();
20567 emit_label (begin_compare_label);
20570 rtx cleanup_label = NULL;
20571 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20572 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20574 /* Generate sequence of ld/ldbrx, cmpb to compare out
20575 to the length specified. */
20576 unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20577 while (bytes_to_compare > 0)
20579 /* Compare sequence:
20580 check each 8B with: ld/ld cmpd bne
20581 If equal, use rldicr/cmpb to check for zero byte.
20582 cleanup code at end:
20583 cmpb get byte that differs
20584 cmpb look for zero byte
20585 orc combine
20586 cntlzd get bit of first zero/diff byte
20587 subfic convert for rldcl use
20588 rldcl rldcl extract diff/zero byte
20589 subf subtract for final result
20591 The last compare can branch around the cleanup code if the
20592 result is zero because the strings are exactly equal. */
20593 unsigned int align = compute_current_alignment (base_align, offset);
20594 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20595 load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20596 word_mode_ok);
20597 else
20598 load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20599 word_mode_ok);
20600 load_mode_size = GET_MODE_SIZE (load_mode);
20601 if (bytes_to_compare >= load_mode_size)
20602 cmp_bytes = load_mode_size;
20603 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20605 /* Move this load back so it doesn't go past the end.
20606 P8/P9 can do this efficiently. */
20607 unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20608 cmp_bytes = bytes_to_compare;
20609 if (extra_bytes < offset)
20611 offset -= extra_bytes;
20612 cmp_bytes = load_mode_size;
20613 bytes_to_compare = cmp_bytes;
20616 else
20617 /* P7 and earlier can't do the overlapping load trick fast,
20618 so this forces a non-overlapping load and a shift to get
20619 rid of the extra bytes. */
20620 cmp_bytes = bytes_to_compare;
20622 src1 = adjust_address (orig_src1, load_mode, offset);
20623 src2 = adjust_address (orig_src2, load_mode, offset);
20625 if (!REG_P (XEXP (src1, 0)))
20627 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20628 src1 = replace_equiv_address (src1, src1_reg);
20630 set_mem_size (src1, cmp_bytes);
20632 if (!REG_P (XEXP (src2, 0)))
20634 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20635 src2 = replace_equiv_address (src2, src2_reg);
20637 set_mem_size (src2, cmp_bytes);
20639 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20640 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20642 /* We must always left-align the data we read, and
20643 clear any bytes to the right that are beyond the string.
20644 Otherwise the cmpb sequence won't produce the correct
20645 results. The beginning of the compare will be done
20646 with word_mode so will not have any extra shifts or
20647 clear rights. */
20649 if (load_mode_size < word_mode_size)
20651 /* Rotate left first. */
20652 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20653 if (word_mode == DImode)
20655 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20656 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20658 else
20660 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20661 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20665 if (cmp_bytes < word_mode_size)
20667 /* Now clear right. This plus the rotate can be
20668 turned into a rldicr instruction. */
20669 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20670 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20671 if (word_mode == DImode)
20673 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20674 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20676 else
20678 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20679 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20683 /* Cases to handle. A and B are chunks of the two strings.
20684 1: Not end of comparison:
20685 A != B: branch to cleanup code to compute result.
20686 A == B: check for 0 byte, next block if not found.
20687 2: End of the inline comparison:
20688 A != B: branch to cleanup code to compute result.
20689 A == B: check for 0 byte, call strcmp/strncmp
20690 3: compared requested N bytes:
20691 A == B: branch to result 0.
20692 A != B: cleanup code to compute result. */
20694 unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20696 rtx dst_label;
20697 if (remain > 0 || equality_compare_rest)
20699 /* Branch to cleanup code, otherwise fall through to do
20700 more compares. */
20701 if (!cleanup_label)
20702 cleanup_label = gen_label_rtx ();
20703 dst_label = cleanup_label;
20705 else
20706 /* Branch to end and produce result of 0. */
20707 dst_label = final_move_label;
20709 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20710 rtx cond = gen_reg_rtx (CCmode);
20712 /* Always produce the 0 result, it is needed if
20713 cmpb finds a 0 byte in this chunk. */
20714 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20715 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20717 rtx cmp_rtx;
20718 if (remain == 0 && !equality_compare_rest)
20719 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20720 else
20721 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20723 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20724 lab_ref, pc_rtx);
20725 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20726 JUMP_LABEL (j) = dst_label;
20727 LABEL_NUSES (dst_label) += 1;
20729 if (remain > 0 || equality_compare_rest)
20731 /* Generate a cmpb to test for a 0 byte and branch
20732 to final result if found. */
20733 rtx cmpb_zero = gen_reg_rtx (word_mode);
20734 rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20735 rtx condz = gen_reg_rtx (CCmode);
20736 rtx zero_reg = gen_reg_rtx (word_mode);
20737 if (word_mode == SImode)
20739 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20740 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20741 if (cmp_bytes < word_mode_size)
20743 /* Don't want to look at zero bytes past end. */
20744 HOST_WIDE_INT mb =
20745 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20746 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20747 emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20750 else
20752 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20753 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20754 if (cmp_bytes < word_mode_size)
20756 /* Don't want to look at zero bytes past end. */
20757 HOST_WIDE_INT mb =
20758 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20759 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20760 emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20764 emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20765 rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20766 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20767 lab_ref_fin, pc_rtx);
20768 rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20769 JUMP_LABEL (j2) = final_move_label;
20770 LABEL_NUSES (final_move_label) += 1;
20774 offset += cmp_bytes;
20775 bytes_to_compare -= cmp_bytes;
20778 if (equality_compare_rest)
20780 /* Update pointers past what has been compared already. */
20781 src1 = adjust_address (orig_src1, load_mode, offset);
20782 src2 = adjust_address (orig_src2, load_mode, offset);
20784 if (!REG_P (XEXP (src1, 0)))
20786 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20787 src1 = replace_equiv_address (src1, src1_reg);
20789 set_mem_size (src1, cmp_bytes);
20791 if (!REG_P (XEXP (src2, 0)))
20793 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20794 src2 = replace_equiv_address (src2, src2_reg);
20796 set_mem_size (src2, cmp_bytes);
20798 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20799 if (no_length)
20801 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20802 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20803 target, LCT_NORMAL, GET_MODE (target),
20804 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20805 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20807 else
20809 rtx len_rtx;
20810 if (TARGET_64BIT)
20811 len_rtx = gen_reg_rtx (DImode);
20812 else
20813 len_rtx = gen_reg_rtx (SImode);
20815 emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20816 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20817 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20818 target, LCT_NORMAL, GET_MODE (target),
20819 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20820 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20821 len_rtx, GET_MODE (len_rtx));
20824 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20825 rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20826 JUMP_LABEL (jmp) = final_label;
20827 LABEL_NUSES (final_label) += 1;
20828 emit_barrier ();
20831 if (cleanup_label)
20832 emit_label (cleanup_label);
20834 /* Generate the final sequence that identifies the differing
20835 byte and generates the final result, taking into account
20836 zero bytes:
20838 cmpb cmpb_result1, src1, src2
20839 cmpb cmpb_result2, src1, zero
20840 orc cmpb_result1, cmp_result1, cmpb_result2
20841 cntlzd get bit of first zero/diff byte
20842 addi convert for rldcl use
20843 rldcl rldcl extract diff/zero byte
20844 subf subtract for final result
20847 rtx cmpb_diff = gen_reg_rtx (word_mode);
20848 rtx cmpb_zero = gen_reg_rtx (word_mode);
20849 rtx rot_amt = gen_reg_rtx (word_mode);
20850 rtx zero_reg = gen_reg_rtx (word_mode);
20852 rtx rot1_1 = gen_reg_rtx (word_mode);
20853 rtx rot1_2 = gen_reg_rtx (word_mode);
20854 rtx rot2_1 = gen_reg_rtx (word_mode);
20855 rtx rot2_2 = gen_reg_rtx (word_mode);
20857 if (word_mode == SImode)
20859 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20860 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20861 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20862 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20863 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20864 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20865 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20866 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20867 gen_lowpart (SImode, rot_amt)));
20868 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20869 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20870 gen_lowpart (SImode, rot_amt)));
20871 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20872 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20874 else
20876 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20877 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20878 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20879 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20880 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20881 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20882 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20883 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20884 gen_lowpart (SImode, rot_amt)));
20885 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20886 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20887 gen_lowpart (SImode, rot_amt)));
20888 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20889 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20892 emit_label (final_move_label);
20893 emit_insn (gen_movsi (target,
20894 gen_lowpart (SImode, result_reg)));
20895 emit_label (final_label);
20896 return true;
20899 /* Expand a block move operation, and return 1 if successful. Return 0
20900 if we should let the compiler generate normal code.
20902 operands[0] is the destination
20903 operands[1] is the source
20904 operands[2] is the length
20905 operands[3] is the alignment */
20907 #define MAX_MOVE_REG 4
20910 expand_block_move (rtx operands[])
20912 rtx orig_dest = operands[0];
20913 rtx orig_src = operands[1];
20914 rtx bytes_rtx = operands[2];
20915 rtx align_rtx = operands[3];
20916 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20917 int align;
20918 int bytes;
20919 int offset;
20920 int move_bytes;
20921 rtx stores[MAX_MOVE_REG];
20922 int num_reg = 0;
20924 /* If this is not a fixed size move, just call memcpy */
20925 if (! constp)
20926 return 0;
20928 /* This must be a fixed size alignment */
20929 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20930 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20932 /* Anything to move? */
20933 bytes = INTVAL (bytes_rtx);
20934 if (bytes <= 0)
20935 return 1;
20937 if (bytes > rs6000_block_move_inline_limit)
20938 return 0;
20940 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20942 union {
20943 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20944 rtx (*mov) (rtx, rtx);
20945 } gen_func;
20946 machine_mode mode = BLKmode;
20947 rtx src, dest;
20949 /* Altivec first, since it will be faster than a string move
20950 when it applies, and usually not significantly larger. */
20951 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20953 move_bytes = 16;
20954 mode = V4SImode;
20955 gen_func.mov = gen_movv4si;
20957 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20959 move_bytes = 8;
20960 mode = V2SImode;
20961 gen_func.mov = gen_movv2si;
20963 else if (TARGET_STRING
20964 && bytes > 24 /* move up to 32 bytes at a time */
20965 && ! fixed_regs[5]
20966 && ! fixed_regs[6]
20967 && ! fixed_regs[7]
20968 && ! fixed_regs[8]
20969 && ! fixed_regs[9]
20970 && ! fixed_regs[10]
20971 && ! fixed_regs[11]
20972 && ! fixed_regs[12])
20974 move_bytes = (bytes > 32) ? 32 : bytes;
20975 gen_func.movmemsi = gen_movmemsi_8reg;
20977 else if (TARGET_STRING
20978 && bytes > 16 /* move up to 24 bytes at a time */
20979 && ! fixed_regs[5]
20980 && ! fixed_regs[6]
20981 && ! fixed_regs[7]
20982 && ! fixed_regs[8]
20983 && ! fixed_regs[9]
20984 && ! fixed_regs[10])
20986 move_bytes = (bytes > 24) ? 24 : bytes;
20987 gen_func.movmemsi = gen_movmemsi_6reg;
20989 else if (TARGET_STRING
20990 && bytes > 8 /* move up to 16 bytes at a time */
20991 && ! fixed_regs[5]
20992 && ! fixed_regs[6]
20993 && ! fixed_regs[7]
20994 && ! fixed_regs[8])
20996 move_bytes = (bytes > 16) ? 16 : bytes;
20997 gen_func.movmemsi = gen_movmemsi_4reg;
20999 else if (bytes >= 8 && TARGET_POWERPC64
21000 && (align >= 64 || !STRICT_ALIGNMENT))
21002 move_bytes = 8;
21003 mode = DImode;
21004 gen_func.mov = gen_movdi;
21005 if (offset == 0 && align < 64)
21007 rtx addr;
21009 /* If the address form is reg+offset with offset not a
21010 multiple of four, reload into reg indirect form here
21011 rather than waiting for reload. This way we get one
21012 reload, not one per load and/or store. */
21013 addr = XEXP (orig_dest, 0);
21014 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21015 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21016 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21018 addr = copy_addr_to_reg (addr);
21019 orig_dest = replace_equiv_address (orig_dest, addr);
21021 addr = XEXP (orig_src, 0);
21022 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21023 && GET_CODE (XEXP (addr, 1)) == CONST_INT
21024 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21026 addr = copy_addr_to_reg (addr);
21027 orig_src = replace_equiv_address (orig_src, addr);
21031 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
21032 { /* move up to 8 bytes at a time */
21033 move_bytes = (bytes > 8) ? 8 : bytes;
21034 gen_func.movmemsi = gen_movmemsi_2reg;
21036 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
21037 { /* move 4 bytes */
21038 move_bytes = 4;
21039 mode = SImode;
21040 gen_func.mov = gen_movsi;
21042 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
21043 { /* move 2 bytes */
21044 move_bytes = 2;
21045 mode = HImode;
21046 gen_func.mov = gen_movhi;
21048 else if (TARGET_STRING && bytes > 1)
21049 { /* move up to 4 bytes at a time */
21050 move_bytes = (bytes > 4) ? 4 : bytes;
21051 gen_func.movmemsi = gen_movmemsi_1reg;
21053 else /* move 1 byte at a time */
21055 move_bytes = 1;
21056 mode = QImode;
21057 gen_func.mov = gen_movqi;
21060 src = adjust_address (orig_src, mode, offset);
21061 dest = adjust_address (orig_dest, mode, offset);
21063 if (mode != BLKmode)
21065 rtx tmp_reg = gen_reg_rtx (mode);
21067 emit_insn ((*gen_func.mov) (tmp_reg, src));
21068 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
21071 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
21073 int i;
21074 for (i = 0; i < num_reg; i++)
21075 emit_insn (stores[i]);
21076 num_reg = 0;
21079 if (mode == BLKmode)
21081 /* Move the address into scratch registers. The movmemsi
21082 patterns require zero offset. */
21083 if (!REG_P (XEXP (src, 0)))
21085 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
21086 src = replace_equiv_address (src, src_reg);
21088 set_mem_size (src, move_bytes);
21090 if (!REG_P (XEXP (dest, 0)))
21092 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
21093 dest = replace_equiv_address (dest, dest_reg);
21095 set_mem_size (dest, move_bytes);
21097 emit_insn ((*gen_func.movmemsi) (dest, src,
21098 GEN_INT (move_bytes & 31),
21099 align_rtx));
21103 return 1;
21107 /* Return a string to perform a load_multiple operation.
21108 operands[0] is the vector.
21109 operands[1] is the source address.
21110 operands[2] is the first destination register. */
21112 const char *
21113 rs6000_output_load_multiple (rtx operands[3])
21115 /* We have to handle the case where the pseudo used to contain the address
21116 is assigned to one of the output registers. */
21117 int i, j;
21118 int words = XVECLEN (operands[0], 0);
21119 rtx xop[10];
21121 if (XVECLEN (operands[0], 0) == 1)
21122 return "lwz %2,0(%1)";
21124 for (i = 0; i < words; i++)
21125 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
21127 if (i == words-1)
21129 xop[0] = GEN_INT (4 * (words-1));
21130 xop[1] = operands[1];
21131 xop[2] = operands[2];
21132 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
21133 return "";
21135 else if (i == 0)
21137 xop[0] = GEN_INT (4 * (words-1));
21138 xop[1] = operands[1];
21139 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
21140 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
21141 return "";
21143 else
21145 for (j = 0; j < words; j++)
21146 if (j != i)
21148 xop[0] = GEN_INT (j * 4);
21149 xop[1] = operands[1];
21150 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
21151 output_asm_insn ("lwz %2,%0(%1)", xop);
21153 xop[0] = GEN_INT (i * 4);
21154 xop[1] = operands[1];
21155 output_asm_insn ("lwz %1,%0(%1)", xop);
21156 return "";
21160 return "lswi %2,%1,%N0";
21164 /* A validation routine: say whether CODE, a condition code, and MODE
21165 match. The other alternatives either don't make sense or should
21166 never be generated. */
21168 void
21169 validate_condition_mode (enum rtx_code code, machine_mode mode)
21171 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
21172 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
21173 && GET_MODE_CLASS (mode) == MODE_CC);
21175 /* These don't make sense. */
21176 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
21177 || mode != CCUNSmode);
21179 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
21180 || mode == CCUNSmode);
21182 gcc_assert (mode == CCFPmode
21183 || (code != ORDERED && code != UNORDERED
21184 && code != UNEQ && code != LTGT
21185 && code != UNGT && code != UNLT
21186 && code != UNGE && code != UNLE));
21188 /* These should never be generated except for
21189 flag_finite_math_only. */
21190 gcc_assert (mode != CCFPmode
21191 || flag_finite_math_only
21192 || (code != LE && code != GE
21193 && code != UNEQ && code != LTGT
21194 && code != UNGT && code != UNLT));
21196 /* These are invalid; the information is not there. */
21197 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
21201 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21202 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21203 not zero, store there the bit offset (counted from the right) where
21204 the single stretch of 1 bits begins; and similarly for B, the bit
21205 offset where it ends. */
21207 bool
21208 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
21210 unsigned HOST_WIDE_INT val = INTVAL (mask);
21211 unsigned HOST_WIDE_INT bit;
21212 int nb, ne;
21213 int n = GET_MODE_PRECISION (mode);
21215 if (mode != DImode && mode != SImode)
21216 return false;
21218 if (INTVAL (mask) >= 0)
21220 bit = val & -val;
21221 ne = exact_log2 (bit);
21222 nb = exact_log2 (val + bit);
21224 else if (val + 1 == 0)
21226 nb = n;
21227 ne = 0;
21229 else if (val & 1)
21231 val = ~val;
21232 bit = val & -val;
21233 nb = exact_log2 (bit);
21234 ne = exact_log2 (val + bit);
21236 else
21238 bit = val & -val;
21239 ne = exact_log2 (bit);
21240 if (val + bit == 0)
21241 nb = n;
21242 else
21243 nb = 0;
21246 nb--;
21248 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
21249 return false;
21251 if (b)
21252 *b = nb;
21253 if (e)
21254 *e = ne;
21256 return true;
21259 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21260 or rldicr instruction, to implement an AND with it in mode MODE. */
21262 bool
21263 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
21265 int nb, ne;
21267 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21268 return false;
21270 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21271 does not wrap. */
21272 if (mode == DImode)
21273 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
21275 /* For SImode, rlwinm can do everything. */
21276 if (mode == SImode)
21277 return (nb < 32 && ne < 32);
21279 return false;
21282 /* Return the instruction template for an AND with mask in mode MODE, with
21283 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21285 const char *
21286 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
21288 int nb, ne;
21290 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
21291 gcc_unreachable ();
21293 if (mode == DImode && ne == 0)
21295 operands[3] = GEN_INT (63 - nb);
21296 if (dot)
21297 return "rldicl. %0,%1,0,%3";
21298 return "rldicl %0,%1,0,%3";
21301 if (mode == DImode && nb == 63)
21303 operands[3] = GEN_INT (63 - ne);
21304 if (dot)
21305 return "rldicr. %0,%1,0,%3";
21306 return "rldicr %0,%1,0,%3";
21309 if (nb < 32 && ne < 32)
21311 operands[3] = GEN_INT (31 - nb);
21312 operands[4] = GEN_INT (31 - ne);
21313 if (dot)
21314 return "rlwinm. %0,%1,0,%3,%4";
21315 return "rlwinm %0,%1,0,%3,%4";
21318 gcc_unreachable ();
21321 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21322 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21323 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21325 bool
21326 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
21328 int nb, ne;
21330 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21331 return false;
21333 int n = GET_MODE_PRECISION (mode);
21334 int sh = -1;
21336 if (CONST_INT_P (XEXP (shift, 1)))
21338 sh = INTVAL (XEXP (shift, 1));
21339 if (sh < 0 || sh >= n)
21340 return false;
21343 rtx_code code = GET_CODE (shift);
21345 /* Convert any shift by 0 to a rotate, to simplify below code. */
21346 if (sh == 0)
21347 code = ROTATE;
21349 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21350 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21351 code = ASHIFT;
21352 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21354 code = LSHIFTRT;
21355 sh = n - sh;
21358 /* DImode rotates need rld*. */
21359 if (mode == DImode && code == ROTATE)
21360 return (nb == 63 || ne == 0 || ne == sh);
21362 /* SImode rotates need rlw*. */
21363 if (mode == SImode && code == ROTATE)
21364 return (nb < 32 && ne < 32 && sh < 32);
21366 /* Wrap-around masks are only okay for rotates. */
21367 if (ne > nb)
21368 return false;
21370 /* Variable shifts are only okay for rotates. */
21371 if (sh < 0)
21372 return false;
21374 /* Don't allow ASHIFT if the mask is wrong for that. */
21375 if (code == ASHIFT && ne < sh)
21376 return false;
21378 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21379 if the mask is wrong for that. */
21380 if (nb < 32 && ne < 32 && sh < 32
21381 && !(code == LSHIFTRT && nb >= 32 - sh))
21382 return true;
21384 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21385 if the mask is wrong for that. */
21386 if (code == LSHIFTRT)
21387 sh = 64 - sh;
21388 if (nb == 63 || ne == 0 || ne == sh)
21389 return !(code == LSHIFTRT && nb >= sh);
21391 return false;
21394 /* Return the instruction template for a shift with mask in mode MODE, with
21395 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21397 const char *
21398 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21400 int nb, ne;
21402 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21403 gcc_unreachable ();
21405 if (mode == DImode && ne == 0)
21407 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21408 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21409 operands[3] = GEN_INT (63 - nb);
21410 if (dot)
21411 return "rld%I2cl. %0,%1,%2,%3";
21412 return "rld%I2cl %0,%1,%2,%3";
21415 if (mode == DImode && nb == 63)
21417 operands[3] = GEN_INT (63 - ne);
21418 if (dot)
21419 return "rld%I2cr. %0,%1,%2,%3";
21420 return "rld%I2cr %0,%1,%2,%3";
21423 if (mode == DImode
21424 && GET_CODE (operands[4]) != LSHIFTRT
21425 && CONST_INT_P (operands[2])
21426 && ne == INTVAL (operands[2]))
21428 operands[3] = GEN_INT (63 - nb);
21429 if (dot)
21430 return "rld%I2c. %0,%1,%2,%3";
21431 return "rld%I2c %0,%1,%2,%3";
21434 if (nb < 32 && ne < 32)
21436 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21437 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21438 operands[3] = GEN_INT (31 - nb);
21439 operands[4] = GEN_INT (31 - ne);
21440 /* This insn can also be a 64-bit rotate with mask that really makes
21441 it just a shift right (with mask); the %h below are to adjust for
21442 that situation (shift count is >= 32 in that case). */
21443 if (dot)
21444 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21445 return "rlw%I2nm %0,%1,%h2,%3,%4";
21448 gcc_unreachable ();
21451 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21452 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21453 ASHIFT, or LSHIFTRT) in mode MODE. */
21455 bool
21456 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21458 int nb, ne;
21460 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21461 return false;
21463 int n = GET_MODE_PRECISION (mode);
21465 int sh = INTVAL (XEXP (shift, 1));
21466 if (sh < 0 || sh >= n)
21467 return false;
21469 rtx_code code = GET_CODE (shift);
21471 /* Convert any shift by 0 to a rotate, to simplify below code. */
21472 if (sh == 0)
21473 code = ROTATE;
21475 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21476 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21477 code = ASHIFT;
21478 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21480 code = LSHIFTRT;
21481 sh = n - sh;
21484 /* DImode rotates need rldimi. */
21485 if (mode == DImode && code == ROTATE)
21486 return (ne == sh);
21488 /* SImode rotates need rlwimi. */
21489 if (mode == SImode && code == ROTATE)
21490 return (nb < 32 && ne < 32 && sh < 32);
21492 /* Wrap-around masks are only okay for rotates. */
21493 if (ne > nb)
21494 return false;
21496 /* Don't allow ASHIFT if the mask is wrong for that. */
21497 if (code == ASHIFT && ne < sh)
21498 return false;
21500 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21501 if the mask is wrong for that. */
21502 if (nb < 32 && ne < 32 && sh < 32
21503 && !(code == LSHIFTRT && nb >= 32 - sh))
21504 return true;
21506 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21507 if the mask is wrong for that. */
21508 if (code == LSHIFTRT)
21509 sh = 64 - sh;
21510 if (ne == sh)
21511 return !(code == LSHIFTRT && nb >= sh);
21513 return false;
21516 /* Return the instruction template for an insert with mask in mode MODE, with
21517 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21519 const char *
21520 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21522 int nb, ne;
21524 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21525 gcc_unreachable ();
21527 /* Prefer rldimi because rlwimi is cracked. */
21528 if (TARGET_POWERPC64
21529 && (!dot || mode == DImode)
21530 && GET_CODE (operands[4]) != LSHIFTRT
21531 && ne == INTVAL (operands[2]))
21533 operands[3] = GEN_INT (63 - nb);
21534 if (dot)
21535 return "rldimi. %0,%1,%2,%3";
21536 return "rldimi %0,%1,%2,%3";
21539 if (nb < 32 && ne < 32)
21541 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21542 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21543 operands[3] = GEN_INT (31 - nb);
21544 operands[4] = GEN_INT (31 - ne);
21545 if (dot)
21546 return "rlwimi. %0,%1,%2,%3,%4";
21547 return "rlwimi %0,%1,%2,%3,%4";
21550 gcc_unreachable ();
21553 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21554 using two machine instructions. */
21556 bool
21557 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21559 /* There are two kinds of AND we can handle with two insns:
21560 1) those we can do with two rl* insn;
21561 2) ori[s];xori[s].
21563 We do not handle that last case yet. */
21565 /* If there is just one stretch of ones, we can do it. */
21566 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21567 return true;
21569 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21570 one insn, we can do the whole thing with two. */
21571 unsigned HOST_WIDE_INT val = INTVAL (c);
21572 unsigned HOST_WIDE_INT bit1 = val & -val;
21573 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21574 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21575 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21576 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21579 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21580 If EXPAND is true, split rotate-and-mask instructions we generate to
21581 their constituent parts as well (this is used during expand); if DOT
21582 is 1, make the last insn a record-form instruction clobbering the
21583 destination GPR and setting the CC reg (from operands[3]); if 2, set
21584 that GPR as well as the CC reg. */
21586 void
21587 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21589 gcc_assert (!(expand && dot));
21591 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21593 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21594 shift right. This generates better code than doing the masks without
21595 shifts, or shifting first right and then left. */
21596 int nb, ne;
21597 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21599 gcc_assert (mode == DImode);
21601 int shift = 63 - nb;
21602 if (expand)
21604 rtx tmp1 = gen_reg_rtx (DImode);
21605 rtx tmp2 = gen_reg_rtx (DImode);
21606 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21607 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21608 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21610 else
21612 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21613 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21614 emit_move_insn (operands[0], tmp);
21615 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21616 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21618 return;
21621 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21622 that does the rest. */
21623 unsigned HOST_WIDE_INT bit1 = val & -val;
21624 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21625 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21626 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21628 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21629 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21631 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21633 /* Two "no-rotate"-and-mask instructions, for SImode. */
21634 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21636 gcc_assert (mode == SImode);
21638 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21639 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21640 emit_move_insn (reg, tmp);
21641 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21642 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21643 return;
21646 gcc_assert (mode == DImode);
21648 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21649 insns; we have to do the first in SImode, because it wraps. */
21650 if (mask2 <= 0xffffffff
21651 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21653 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21654 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21655 GEN_INT (mask1));
21656 rtx reg_low = gen_lowpart (SImode, reg);
21657 emit_move_insn (reg_low, tmp);
21658 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21659 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21660 return;
21663 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21664 at the top end), rotate back and clear the other hole. */
21665 int right = exact_log2 (bit3);
21666 int left = 64 - right;
21668 /* Rotate the mask too. */
21669 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21671 if (expand)
21673 rtx tmp1 = gen_reg_rtx (DImode);
21674 rtx tmp2 = gen_reg_rtx (DImode);
21675 rtx tmp3 = gen_reg_rtx (DImode);
21676 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21677 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21678 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21679 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21681 else
21683 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21684 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21685 emit_move_insn (operands[0], tmp);
21686 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21687 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21688 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21692 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21693 for lfq and stfq insns iff the registers are hard registers. */
21696 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21698 /* We might have been passed a SUBREG. */
21699 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21700 return 0;
21702 /* We might have been passed non floating point registers. */
21703 if (!FP_REGNO_P (REGNO (reg1))
21704 || !FP_REGNO_P (REGNO (reg2)))
21705 return 0;
21707 return (REGNO (reg1) == REGNO (reg2) - 1);
21710 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21711 addr1 and addr2 must be in consecutive memory locations
21712 (addr2 == addr1 + 8). */
21715 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21717 rtx addr1, addr2;
21718 unsigned int reg1, reg2;
21719 int offset1, offset2;
21721 /* The mems cannot be volatile. */
21722 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21723 return 0;
21725 addr1 = XEXP (mem1, 0);
21726 addr2 = XEXP (mem2, 0);
21728 /* Extract an offset (if used) from the first addr. */
21729 if (GET_CODE (addr1) == PLUS)
21731 /* If not a REG, return zero. */
21732 if (GET_CODE (XEXP (addr1, 0)) != REG)
21733 return 0;
21734 else
21736 reg1 = REGNO (XEXP (addr1, 0));
21737 /* The offset must be constant! */
21738 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21739 return 0;
21740 offset1 = INTVAL (XEXP (addr1, 1));
21743 else if (GET_CODE (addr1) != REG)
21744 return 0;
21745 else
21747 reg1 = REGNO (addr1);
21748 /* This was a simple (mem (reg)) expression. Offset is 0. */
21749 offset1 = 0;
21752 /* And now for the second addr. */
21753 if (GET_CODE (addr2) == PLUS)
21755 /* If not a REG, return zero. */
21756 if (GET_CODE (XEXP (addr2, 0)) != REG)
21757 return 0;
21758 else
21760 reg2 = REGNO (XEXP (addr2, 0));
21761 /* The offset must be constant. */
21762 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21763 return 0;
21764 offset2 = INTVAL (XEXP (addr2, 1));
21767 else if (GET_CODE (addr2) != REG)
21768 return 0;
21769 else
21771 reg2 = REGNO (addr2);
21772 /* This was a simple (mem (reg)) expression. Offset is 0. */
21773 offset2 = 0;
21776 /* Both of these must have the same base register. */
21777 if (reg1 != reg2)
21778 return 0;
21780 /* The offset for the second addr must be 8 more than the first addr. */
21781 if (offset2 != offset1 + 8)
21782 return 0;
21784 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21785 instructions. */
21786 return 1;
21791 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21793 static bool eliminated = false;
21794 rtx ret;
21796 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21797 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21798 else
21800 rtx mem = cfun->machine->sdmode_stack_slot;
21801 gcc_assert (mem != NULL_RTX);
21803 if (!eliminated)
21805 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21806 cfun->machine->sdmode_stack_slot = mem;
21807 eliminated = true;
21809 ret = mem;
21812 if (TARGET_DEBUG_ADDR)
21814 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21815 GET_MODE_NAME (mode));
21816 if (!ret)
21817 fprintf (stderr, "\tNULL_RTX\n");
21818 else
21819 debug_rtx (ret);
21822 return ret;
21825 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. For SDmode values we
21826 need to use DDmode, in all other cases we can use the same mode. */
21827 static machine_mode
21828 rs6000_secondary_memory_needed_mode (machine_mode mode)
21830 if (lra_in_progress && mode == SDmode)
21831 return DDmode;
21832 return mode;
21835 static tree
21836 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21838 /* Don't walk into types. */
21839 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21841 *walk_subtrees = 0;
21842 return NULL_TREE;
21845 switch (TREE_CODE (*tp))
21847 case VAR_DECL:
21848 case PARM_DECL:
21849 case FIELD_DECL:
21850 case RESULT_DECL:
21851 case SSA_NAME:
21852 case REAL_CST:
21853 case MEM_REF:
21854 case VIEW_CONVERT_EXPR:
21855 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21856 return *tp;
21857 break;
21858 default:
21859 break;
21862 return NULL_TREE;
21865 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21866 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21867 only work on the traditional altivec registers, note if an altivec register
21868 was chosen. */
21870 static enum rs6000_reg_type
21871 register_to_reg_type (rtx reg, bool *is_altivec)
21873 HOST_WIDE_INT regno;
21874 enum reg_class rclass;
21876 if (GET_CODE (reg) == SUBREG)
21877 reg = SUBREG_REG (reg);
21879 if (!REG_P (reg))
21880 return NO_REG_TYPE;
21882 regno = REGNO (reg);
21883 if (regno >= FIRST_PSEUDO_REGISTER)
21885 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21886 return PSEUDO_REG_TYPE;
21888 regno = true_regnum (reg);
21889 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21890 return PSEUDO_REG_TYPE;
21893 gcc_assert (regno >= 0);
21895 if (is_altivec && ALTIVEC_REGNO_P (regno))
21896 *is_altivec = true;
21898 rclass = rs6000_regno_regclass[regno];
21899 return reg_class_to_reg_type[(int)rclass];
21902 /* Helper function to return the cost of adding a TOC entry address. */
21904 static inline int
21905 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21907 int ret;
21909 if (TARGET_CMODEL != CMODEL_SMALL)
21910 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21912 else
21913 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21915 return ret;
21918 /* Helper function for rs6000_secondary_reload to determine whether the memory
21919 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21920 needs reloading. Return negative if the memory is not handled by the memory
21921 helper functions and to try a different reload method, 0 if no additional
21922 instructions are need, and positive to give the extra cost for the
21923 memory. */
21925 static int
21926 rs6000_secondary_reload_memory (rtx addr,
21927 enum reg_class rclass,
21928 machine_mode mode)
21930 int extra_cost = 0;
21931 rtx reg, and_arg, plus_arg0, plus_arg1;
21932 addr_mask_type addr_mask;
21933 const char *type = NULL;
21934 const char *fail_msg = NULL;
21936 if (GPR_REG_CLASS_P (rclass))
21937 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21939 else if (rclass == FLOAT_REGS)
21940 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21942 else if (rclass == ALTIVEC_REGS)
21943 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21945 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21946 else if (rclass == VSX_REGS)
21947 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21948 & ~RELOAD_REG_AND_M16);
21950 /* If the register allocator hasn't made up its mind yet on the register
21951 class to use, settle on defaults to use. */
21952 else if (rclass == NO_REGS)
21954 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21955 & ~RELOAD_REG_AND_M16);
21957 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21958 addr_mask &= ~(RELOAD_REG_INDEXED
21959 | RELOAD_REG_PRE_INCDEC
21960 | RELOAD_REG_PRE_MODIFY);
21963 else
21964 addr_mask = 0;
21966 /* If the register isn't valid in this register class, just return now. */
21967 if ((addr_mask & RELOAD_REG_VALID) == 0)
21969 if (TARGET_DEBUG_ADDR)
21971 fprintf (stderr,
21972 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21973 "not valid in class\n",
21974 GET_MODE_NAME (mode), reg_class_names[rclass]);
21975 debug_rtx (addr);
21978 return -1;
21981 switch (GET_CODE (addr))
21983 /* Does the register class supports auto update forms for this mode? We
21984 don't need a scratch register, since the powerpc only supports
21985 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21986 case PRE_INC:
21987 case PRE_DEC:
21988 reg = XEXP (addr, 0);
21989 if (!base_reg_operand (addr, GET_MODE (reg)))
21991 fail_msg = "no base register #1";
21992 extra_cost = -1;
21995 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21997 extra_cost = 1;
21998 type = "update";
22000 break;
22002 case PRE_MODIFY:
22003 reg = XEXP (addr, 0);
22004 plus_arg1 = XEXP (addr, 1);
22005 if (!base_reg_operand (reg, GET_MODE (reg))
22006 || GET_CODE (plus_arg1) != PLUS
22007 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
22009 fail_msg = "bad PRE_MODIFY";
22010 extra_cost = -1;
22013 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22015 extra_cost = 1;
22016 type = "update";
22018 break;
22020 /* Do we need to simulate AND -16 to clear the bottom address bits used
22021 in VMX load/stores? Only allow the AND for vector sizes. */
22022 case AND:
22023 and_arg = XEXP (addr, 0);
22024 if (GET_MODE_SIZE (mode) != 16
22025 || GET_CODE (XEXP (addr, 1)) != CONST_INT
22026 || INTVAL (XEXP (addr, 1)) != -16)
22028 fail_msg = "bad Altivec AND #1";
22029 extra_cost = -1;
22032 if (rclass != ALTIVEC_REGS)
22034 if (legitimate_indirect_address_p (and_arg, false))
22035 extra_cost = 1;
22037 else if (legitimate_indexed_address_p (and_arg, false))
22038 extra_cost = 2;
22040 else
22042 fail_msg = "bad Altivec AND #2";
22043 extra_cost = -1;
22046 type = "and";
22048 break;
22050 /* If this is an indirect address, make sure it is a base register. */
22051 case REG:
22052 case SUBREG:
22053 if (!legitimate_indirect_address_p (addr, false))
22055 extra_cost = 1;
22056 type = "move";
22058 break;
22060 /* If this is an indexed address, make sure the register class can handle
22061 indexed addresses for this mode. */
22062 case PLUS:
22063 plus_arg0 = XEXP (addr, 0);
22064 plus_arg1 = XEXP (addr, 1);
22066 /* (plus (plus (reg) (constant)) (constant)) is generated during
22067 push_reload processing, so handle it now. */
22068 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
22070 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22072 extra_cost = 1;
22073 type = "offset";
22077 /* (plus (plus (reg) (constant)) (reg)) is also generated during
22078 push_reload processing, so handle it now. */
22079 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
22081 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22083 extra_cost = 1;
22084 type = "indexed #2";
22088 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
22090 fail_msg = "no base register #2";
22091 extra_cost = -1;
22094 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
22096 if ((addr_mask & RELOAD_REG_INDEXED) == 0
22097 || !legitimate_indexed_address_p (addr, false))
22099 extra_cost = 1;
22100 type = "indexed";
22104 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
22105 && CONST_INT_P (plus_arg1))
22107 if (!quad_address_offset_p (INTVAL (plus_arg1)))
22109 extra_cost = 1;
22110 type = "vector d-form offset";
22114 /* Make sure the register class can handle offset addresses. */
22115 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22117 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22119 extra_cost = 1;
22120 type = "offset #2";
22124 else
22126 fail_msg = "bad PLUS";
22127 extra_cost = -1;
22130 break;
22132 case LO_SUM:
22133 /* Quad offsets are restricted and can't handle normal addresses. */
22134 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22136 extra_cost = -1;
22137 type = "vector d-form lo_sum";
22140 else if (!legitimate_lo_sum_address_p (mode, addr, false))
22142 fail_msg = "bad LO_SUM";
22143 extra_cost = -1;
22146 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22148 extra_cost = 1;
22149 type = "lo_sum";
22151 break;
22153 /* Static addresses need to create a TOC entry. */
22154 case CONST:
22155 case SYMBOL_REF:
22156 case LABEL_REF:
22157 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22159 extra_cost = -1;
22160 type = "vector d-form lo_sum #2";
22163 else
22165 type = "address";
22166 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
22168 break;
22170 /* TOC references look like offsetable memory. */
22171 case UNSPEC:
22172 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
22174 fail_msg = "bad UNSPEC";
22175 extra_cost = -1;
22178 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22180 extra_cost = -1;
22181 type = "vector d-form lo_sum #3";
22184 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22186 extra_cost = 1;
22187 type = "toc reference";
22189 break;
22191 default:
22193 fail_msg = "bad address";
22194 extra_cost = -1;
22198 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
22200 if (extra_cost < 0)
22201 fprintf (stderr,
22202 "rs6000_secondary_reload_memory error: mode = %s, "
22203 "class = %s, addr_mask = '%s', %s\n",
22204 GET_MODE_NAME (mode),
22205 reg_class_names[rclass],
22206 rs6000_debug_addr_mask (addr_mask, false),
22207 (fail_msg != NULL) ? fail_msg : "<bad address>");
22209 else
22210 fprintf (stderr,
22211 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22212 "addr_mask = '%s', extra cost = %d, %s\n",
22213 GET_MODE_NAME (mode),
22214 reg_class_names[rclass],
22215 rs6000_debug_addr_mask (addr_mask, false),
22216 extra_cost,
22217 (type) ? type : "<none>");
22219 debug_rtx (addr);
22222 return extra_cost;
22225 /* Helper function for rs6000_secondary_reload to return true if a move to a
22226 different register classe is really a simple move. */
22228 static bool
22229 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
22230 enum rs6000_reg_type from_type,
22231 machine_mode mode)
22233 int size = GET_MODE_SIZE (mode);
22235 /* Add support for various direct moves available. In this function, we only
22236 look at cases where we don't need any extra registers, and one or more
22237 simple move insns are issued. Originally small integers are not allowed
22238 in FPR/VSX registers. Single precision binary floating is not a simple
22239 move because we need to convert to the single precision memory layout.
22240 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22241 need special direct move handling, which we do not support yet. */
22242 if (TARGET_DIRECT_MOVE
22243 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22244 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
22246 if (TARGET_POWERPC64)
22248 /* ISA 2.07: MTVSRD or MVFVSRD. */
22249 if (size == 8)
22250 return true;
22252 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22253 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
22254 return true;
22257 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22258 if (TARGET_VSX_SMALL_INTEGER)
22260 if (mode == SImode)
22261 return true;
22263 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
22264 return true;
22267 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22268 if (mode == SDmode)
22269 return true;
22272 /* Power6+: MFTGPR or MFFGPR. */
22273 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
22274 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
22275 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22276 return true;
22278 /* Move to/from SPR. */
22279 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
22280 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
22281 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22282 return true;
22284 return false;
22287 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22288 special direct moves that involve allocating an extra register, return the
22289 insn code of the helper function if there is such a function or
22290 CODE_FOR_nothing if not. */
22292 static bool
22293 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
22294 enum rs6000_reg_type from_type,
22295 machine_mode mode,
22296 secondary_reload_info *sri,
22297 bool altivec_p)
22299 bool ret = false;
22300 enum insn_code icode = CODE_FOR_nothing;
22301 int cost = 0;
22302 int size = GET_MODE_SIZE (mode);
22304 if (TARGET_POWERPC64 && size == 16)
22306 /* Handle moving 128-bit values from GPRs to VSX point registers on
22307 ISA 2.07 (power8, power9) when running in 64-bit mode using
22308 XXPERMDI to glue the two 64-bit values back together. */
22309 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22311 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
22312 icode = reg_addr[mode].reload_vsx_gpr;
22315 /* Handle moving 128-bit values from VSX point registers to GPRs on
22316 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22317 bottom 64-bit value. */
22318 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22320 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
22321 icode = reg_addr[mode].reload_gpr_vsx;
22325 else if (TARGET_POWERPC64 && mode == SFmode)
22327 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22329 cost = 3; /* xscvdpspn, mfvsrd, and. */
22330 icode = reg_addr[mode].reload_gpr_vsx;
22333 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22335 cost = 2; /* mtvsrz, xscvspdpn. */
22336 icode = reg_addr[mode].reload_vsx_gpr;
22340 else if (!TARGET_POWERPC64 && size == 8)
22342 /* Handle moving 64-bit values from GPRs to floating point registers on
22343 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22344 32-bit values back together. Altivec register classes must be handled
22345 specially since a different instruction is used, and the secondary
22346 reload support requires a single instruction class in the scratch
22347 register constraint. However, right now TFmode is not allowed in
22348 Altivec registers, so the pattern will never match. */
22349 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
22351 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
22352 icode = reg_addr[mode].reload_fpr_gpr;
22356 if (icode != CODE_FOR_nothing)
22358 ret = true;
22359 if (sri)
22361 sri->icode = icode;
22362 sri->extra_cost = cost;
22366 return ret;
22369 /* Return whether a move between two register classes can be done either
22370 directly (simple move) or via a pattern that uses a single extra temporary
22371 (using ISA 2.07's direct move in this case. */
22373 static bool
22374 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
22375 enum rs6000_reg_type from_type,
22376 machine_mode mode,
22377 secondary_reload_info *sri,
22378 bool altivec_p)
22380 /* Fall back to load/store reloads if either type is not a register. */
22381 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
22382 return false;
22384 /* If we haven't allocated registers yet, assume the move can be done for the
22385 standard register types. */
22386 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
22387 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
22388 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
22389 return true;
22391 /* Moves to the same set of registers is a simple move for non-specialized
22392 registers. */
22393 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
22394 return true;
22396 /* Check whether a simple move can be done directly. */
22397 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22399 if (sri)
22401 sri->icode = CODE_FOR_nothing;
22402 sri->extra_cost = 0;
22404 return true;
22407 /* Now check if we can do it in a few steps. */
22408 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22409 altivec_p);
22412 /* Inform reload about cases where moving X with a mode MODE to a register in
22413 RCLASS requires an extra scratch or immediate register. Return the class
22414 needed for the immediate register.
22416 For VSX and Altivec, we may need a register to convert sp+offset into
22417 reg+sp.
22419 For misaligned 64-bit gpr loads and stores we need a register to
22420 convert an offset address to indirect. */
22422 static reg_class_t
22423 rs6000_secondary_reload (bool in_p,
22424 rtx x,
22425 reg_class_t rclass_i,
22426 machine_mode mode,
22427 secondary_reload_info *sri)
22429 enum reg_class rclass = (enum reg_class) rclass_i;
22430 reg_class_t ret = ALL_REGS;
22431 enum insn_code icode;
22432 bool default_p = false;
22433 bool done_p = false;
22435 /* Allow subreg of memory before/during reload. */
22436 bool memory_p = (MEM_P (x)
22437 || (!reload_completed && GET_CODE (x) == SUBREG
22438 && MEM_P (SUBREG_REG (x))));
22440 sri->icode = CODE_FOR_nothing;
22441 sri->t_icode = CODE_FOR_nothing;
22442 sri->extra_cost = 0;
22443 icode = ((in_p)
22444 ? reg_addr[mode].reload_load
22445 : reg_addr[mode].reload_store);
22447 if (REG_P (x) || register_operand (x, mode))
22449 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22450 bool altivec_p = (rclass == ALTIVEC_REGS);
22451 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22453 if (!in_p)
22454 std::swap (to_type, from_type);
22456 /* Can we do a direct move of some sort? */
22457 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22458 altivec_p))
22460 icode = (enum insn_code)sri->icode;
22461 default_p = false;
22462 done_p = true;
22463 ret = NO_REGS;
22467 /* Make sure 0.0 is not reloaded or forced into memory. */
22468 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22470 ret = NO_REGS;
22471 default_p = false;
22472 done_p = true;
22475 /* If this is a scalar floating point value and we want to load it into the
22476 traditional Altivec registers, do it via a move via a traditional floating
22477 point register, unless we have D-form addressing. Also make sure that
22478 non-zero constants use a FPR. */
22479 if (!done_p && reg_addr[mode].scalar_in_vmx_p
22480 && !mode_supports_vmx_dform (mode)
22481 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22482 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22484 ret = FLOAT_REGS;
22485 default_p = false;
22486 done_p = true;
22489 /* Handle reload of load/stores if we have reload helper functions. */
22490 if (!done_p && icode != CODE_FOR_nothing && memory_p)
22492 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22493 mode);
22495 if (extra_cost >= 0)
22497 done_p = true;
22498 ret = NO_REGS;
22499 if (extra_cost > 0)
22501 sri->extra_cost = extra_cost;
22502 sri->icode = icode;
22507 /* Handle unaligned loads and stores of integer registers. */
22508 if (!done_p && TARGET_POWERPC64
22509 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22510 && memory_p
22511 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22513 rtx addr = XEXP (x, 0);
22514 rtx off = address_offset (addr);
22516 if (off != NULL_RTX)
22518 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22519 unsigned HOST_WIDE_INT offset = INTVAL (off);
22521 /* We need a secondary reload when our legitimate_address_p
22522 says the address is good (as otherwise the entire address
22523 will be reloaded), and the offset is not a multiple of
22524 four or we have an address wrap. Address wrap will only
22525 occur for LO_SUMs since legitimate_offset_address_p
22526 rejects addresses for 16-byte mems that will wrap. */
22527 if (GET_CODE (addr) == LO_SUM
22528 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22529 && ((offset & 3) != 0
22530 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22531 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22532 && (offset & 3) != 0))
22534 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22535 if (in_p)
22536 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22537 : CODE_FOR_reload_di_load);
22538 else
22539 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22540 : CODE_FOR_reload_di_store);
22541 sri->extra_cost = 2;
22542 ret = NO_REGS;
22543 done_p = true;
22545 else
22546 default_p = true;
22548 else
22549 default_p = true;
22552 if (!done_p && !TARGET_POWERPC64
22553 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22554 && memory_p
22555 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22557 rtx addr = XEXP (x, 0);
22558 rtx off = address_offset (addr);
22560 if (off != NULL_RTX)
22562 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22563 unsigned HOST_WIDE_INT offset = INTVAL (off);
22565 /* We need a secondary reload when our legitimate_address_p
22566 says the address is good (as otherwise the entire address
22567 will be reloaded), and we have a wrap.
22569 legitimate_lo_sum_address_p allows LO_SUM addresses to
22570 have any offset so test for wrap in the low 16 bits.
22572 legitimate_offset_address_p checks for the range
22573 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22574 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22575 [0x7ff4,0x7fff] respectively, so test for the
22576 intersection of these ranges, [0x7ffc,0x7fff] and
22577 [0x7ff4,0x7ff7] respectively.
22579 Note that the address we see here may have been
22580 manipulated by legitimize_reload_address. */
22581 if (GET_CODE (addr) == LO_SUM
22582 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22583 : offset - (0x8000 - extra) < UNITS_PER_WORD)
22585 if (in_p)
22586 sri->icode = CODE_FOR_reload_si_load;
22587 else
22588 sri->icode = CODE_FOR_reload_si_store;
22589 sri->extra_cost = 2;
22590 ret = NO_REGS;
22591 done_p = true;
22593 else
22594 default_p = true;
22596 else
22597 default_p = true;
22600 if (!done_p)
22601 default_p = true;
22603 if (default_p)
22604 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22606 gcc_assert (ret != ALL_REGS);
22608 if (TARGET_DEBUG_ADDR)
22610 fprintf (stderr,
22611 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22612 "mode = %s",
22613 reg_class_names[ret],
22614 in_p ? "true" : "false",
22615 reg_class_names[rclass],
22616 GET_MODE_NAME (mode));
22618 if (reload_completed)
22619 fputs (", after reload", stderr);
22621 if (!done_p)
22622 fputs (", done_p not set", stderr);
22624 if (default_p)
22625 fputs (", default secondary reload", stderr);
22627 if (sri->icode != CODE_FOR_nothing)
22628 fprintf (stderr, ", reload func = %s, extra cost = %d",
22629 insn_data[sri->icode].name, sri->extra_cost);
22631 else if (sri->extra_cost > 0)
22632 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22634 fputs ("\n", stderr);
22635 debug_rtx (x);
22638 return ret;
22641 /* Better tracing for rs6000_secondary_reload_inner. */
22643 static void
22644 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22645 bool store_p)
22647 rtx set, clobber;
22649 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22651 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22652 store_p ? "store" : "load");
22654 if (store_p)
22655 set = gen_rtx_SET (mem, reg);
22656 else
22657 set = gen_rtx_SET (reg, mem);
22659 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22660 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22663 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22664 ATTRIBUTE_NORETURN;
22666 static void
22667 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22668 bool store_p)
22670 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22671 gcc_unreachable ();
22674 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22675 reload helper functions. These were identified in
22676 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22677 reload, it calls the insns:
22678 reload_<RELOAD:mode>_<P:mptrsize>_store
22679 reload_<RELOAD:mode>_<P:mptrsize>_load
22681 which in turn calls this function, to do whatever is necessary to create
22682 valid addresses. */
22684 void
22685 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22687 int regno = true_regnum (reg);
22688 machine_mode mode = GET_MODE (reg);
22689 addr_mask_type addr_mask;
22690 rtx addr;
22691 rtx new_addr;
22692 rtx op_reg, op0, op1;
22693 rtx and_op;
22694 rtx cc_clobber;
22695 rtvec rv;
22697 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22698 || !base_reg_operand (scratch, GET_MODE (scratch)))
22699 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22701 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22702 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22704 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22705 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22707 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22708 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22710 else
22711 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22713 /* Make sure the mode is valid in this register class. */
22714 if ((addr_mask & RELOAD_REG_VALID) == 0)
22715 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22717 if (TARGET_DEBUG_ADDR)
22718 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22720 new_addr = addr = XEXP (mem, 0);
22721 switch (GET_CODE (addr))
22723 /* Does the register class support auto update forms for this mode? If
22724 not, do the update now. We don't need a scratch register, since the
22725 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22726 case PRE_INC:
22727 case PRE_DEC:
22728 op_reg = XEXP (addr, 0);
22729 if (!base_reg_operand (op_reg, Pmode))
22730 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22732 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22734 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22735 new_addr = op_reg;
22737 break;
22739 case PRE_MODIFY:
22740 op0 = XEXP (addr, 0);
22741 op1 = XEXP (addr, 1);
22742 if (!base_reg_operand (op0, Pmode)
22743 || GET_CODE (op1) != PLUS
22744 || !rtx_equal_p (op0, XEXP (op1, 0)))
22745 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22747 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22749 emit_insn (gen_rtx_SET (op0, op1));
22750 new_addr = reg;
22752 break;
22754 /* Do we need to simulate AND -16 to clear the bottom address bits used
22755 in VMX load/stores? */
22756 case AND:
22757 op0 = XEXP (addr, 0);
22758 op1 = XEXP (addr, 1);
22759 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22761 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22762 op_reg = op0;
22764 else if (GET_CODE (op1) == PLUS)
22766 emit_insn (gen_rtx_SET (scratch, op1));
22767 op_reg = scratch;
22770 else
22771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22773 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22774 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22775 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22776 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22777 new_addr = scratch;
22779 break;
22781 /* If this is an indirect address, make sure it is a base register. */
22782 case REG:
22783 case SUBREG:
22784 if (!base_reg_operand (addr, GET_MODE (addr)))
22786 emit_insn (gen_rtx_SET (scratch, addr));
22787 new_addr = scratch;
22789 break;
22791 /* If this is an indexed address, make sure the register class can handle
22792 indexed addresses for this mode. */
22793 case PLUS:
22794 op0 = XEXP (addr, 0);
22795 op1 = XEXP (addr, 1);
22796 if (!base_reg_operand (op0, Pmode))
22797 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22799 else if (int_reg_operand (op1, Pmode))
22801 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22803 emit_insn (gen_rtx_SET (scratch, addr));
22804 new_addr = scratch;
22808 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22810 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22811 || !quad_address_p (addr, mode, false))
22813 emit_insn (gen_rtx_SET (scratch, addr));
22814 new_addr = scratch;
22818 /* Make sure the register class can handle offset addresses. */
22819 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22821 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22823 emit_insn (gen_rtx_SET (scratch, addr));
22824 new_addr = scratch;
22828 else
22829 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22831 break;
22833 case LO_SUM:
22834 op0 = XEXP (addr, 0);
22835 op1 = XEXP (addr, 1);
22836 if (!base_reg_operand (op0, Pmode))
22837 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22839 else if (int_reg_operand (op1, Pmode))
22841 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22843 emit_insn (gen_rtx_SET (scratch, addr));
22844 new_addr = scratch;
22848 /* Quad offsets are restricted and can't handle normal addresses. */
22849 else if (mode_supports_vsx_dform_quad (mode))
22851 emit_insn (gen_rtx_SET (scratch, addr));
22852 new_addr = scratch;
22855 /* Make sure the register class can handle offset addresses. */
22856 else if (legitimate_lo_sum_address_p (mode, addr, false))
22858 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22860 emit_insn (gen_rtx_SET (scratch, addr));
22861 new_addr = scratch;
22865 else
22866 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22868 break;
22870 case SYMBOL_REF:
22871 case CONST:
22872 case LABEL_REF:
22873 rs6000_emit_move (scratch, addr, Pmode);
22874 new_addr = scratch;
22875 break;
22877 default:
22878 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22881 /* Adjust the address if it changed. */
22882 if (addr != new_addr)
22884 mem = replace_equiv_address_nv (mem, new_addr);
22885 if (TARGET_DEBUG_ADDR)
22886 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22889 /* Now create the move. */
22890 if (store_p)
22891 emit_insn (gen_rtx_SET (mem, reg));
22892 else
22893 emit_insn (gen_rtx_SET (reg, mem));
22895 return;
22898 /* Convert reloads involving 64-bit gprs and misaligned offset
22899 addressing, or multiple 32-bit gprs and offsets that are too large,
22900 to use indirect addressing. */
22902 void
22903 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22905 int regno = true_regnum (reg);
22906 enum reg_class rclass;
22907 rtx addr;
22908 rtx scratch_or_premodify = scratch;
22910 if (TARGET_DEBUG_ADDR)
22912 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22913 store_p ? "store" : "load");
22914 fprintf (stderr, "reg:\n");
22915 debug_rtx (reg);
22916 fprintf (stderr, "mem:\n");
22917 debug_rtx (mem);
22918 fprintf (stderr, "scratch:\n");
22919 debug_rtx (scratch);
22922 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22923 gcc_assert (GET_CODE (mem) == MEM);
22924 rclass = REGNO_REG_CLASS (regno);
22925 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22926 addr = XEXP (mem, 0);
22928 if (GET_CODE (addr) == PRE_MODIFY)
22930 gcc_assert (REG_P (XEXP (addr, 0))
22931 && GET_CODE (XEXP (addr, 1)) == PLUS
22932 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22933 scratch_or_premodify = XEXP (addr, 0);
22934 if (!HARD_REGISTER_P (scratch_or_premodify))
22935 /* If we have a pseudo here then reload will have arranged
22936 to have it replaced, but only in the original insn.
22937 Use the replacement here too. */
22938 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22940 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22941 expressions from the original insn, without unsharing them.
22942 Any RTL that points into the original insn will of course
22943 have register replacements applied. That is why we don't
22944 need to look for replacements under the PLUS. */
22945 addr = XEXP (addr, 1);
22947 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22949 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22951 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22953 /* Now create the move. */
22954 if (store_p)
22955 emit_insn (gen_rtx_SET (mem, reg));
22956 else
22957 emit_insn (gen_rtx_SET (reg, mem));
22959 return;
22962 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22963 this function has any SDmode references. If we are on a power7 or later, we
22964 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22965 can load/store the value. */
22967 static void
22968 rs6000_alloc_sdmode_stack_slot (void)
22970 tree t;
22971 basic_block bb;
22972 gimple_stmt_iterator gsi;
22974 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22975 /* We use a different approach for dealing with the secondary
22976 memory in LRA. */
22977 if (ira_use_lra_p)
22978 return;
22980 if (TARGET_NO_SDMODE_STACK)
22981 return;
22983 FOR_EACH_BB_FN (bb, cfun)
22984 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22986 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22987 if (ret)
22989 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22990 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22991 SDmode, 0);
22992 return;
22996 /* Check for any SDmode parameters of the function. */
22997 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22999 if (TREE_TYPE (t) == error_mark_node)
23000 continue;
23002 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
23003 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
23005 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
23006 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
23007 SDmode, 0);
23008 return;
23013 static void
23014 rs6000_instantiate_decls (void)
23016 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
23017 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
23020 /* Given an rtx X being reloaded into a reg required to be
23021 in class CLASS, return the class of reg to actually use.
23022 In general this is just CLASS; but on some machines
23023 in some cases it is preferable to use a more restrictive class.
23025 On the RS/6000, we have to return NO_REGS when we want to reload a
23026 floating-point CONST_DOUBLE to force it to be copied to memory.
23028 We also don't want to reload integer values into floating-point
23029 registers if we can at all help it. In fact, this can
23030 cause reload to die, if it tries to generate a reload of CTR
23031 into a FP register and discovers it doesn't have the memory location
23032 required.
23034 ??? Would it be a good idea to have reload do the converse, that is
23035 try to reload floating modes into FP registers if possible?
23038 static enum reg_class
23039 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
23041 machine_mode mode = GET_MODE (x);
23042 bool is_constant = CONSTANT_P (x);
23044 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
23045 reload class for it. */
23046 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23047 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
23048 return NO_REGS;
23050 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
23051 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
23052 return NO_REGS;
23054 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
23055 the reloading of address expressions using PLUS into floating point
23056 registers. */
23057 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
23059 if (is_constant)
23061 /* Zero is always allowed in all VSX registers. */
23062 if (x == CONST0_RTX (mode))
23063 return rclass;
23065 /* If this is a vector constant that can be formed with a few Altivec
23066 instructions, we want altivec registers. */
23067 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
23068 return ALTIVEC_REGS;
23070 /* If this is an integer constant that can easily be loaded into
23071 vector registers, allow it. */
23072 if (CONST_INT_P (x))
23074 HOST_WIDE_INT value = INTVAL (x);
23076 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
23077 2.06 can generate it in the Altivec registers with
23078 VSPLTI<x>. */
23079 if (value == -1)
23081 if (TARGET_P8_VECTOR)
23082 return rclass;
23083 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23084 return ALTIVEC_REGS;
23085 else
23086 return NO_REGS;
23089 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
23090 a sign extend in the Altivec registers. */
23091 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
23092 && TARGET_VSX_SMALL_INTEGER
23093 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
23094 return ALTIVEC_REGS;
23097 /* Force constant to memory. */
23098 return NO_REGS;
23101 /* D-form addressing can easily reload the value. */
23102 if (mode_supports_vmx_dform (mode)
23103 || mode_supports_vsx_dform_quad (mode))
23104 return rclass;
23106 /* If this is a scalar floating point value and we don't have D-form
23107 addressing, prefer the traditional floating point registers so that we
23108 can use D-form (register+offset) addressing. */
23109 if (rclass == VSX_REGS
23110 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
23111 return FLOAT_REGS;
23113 /* Prefer the Altivec registers if Altivec is handling the vector
23114 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23115 loads. */
23116 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
23117 || mode == V1TImode)
23118 return ALTIVEC_REGS;
23120 return rclass;
23123 if (is_constant || GET_CODE (x) == PLUS)
23125 if (reg_class_subset_p (GENERAL_REGS, rclass))
23126 return GENERAL_REGS;
23127 if (reg_class_subset_p (BASE_REGS, rclass))
23128 return BASE_REGS;
23129 return NO_REGS;
23132 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
23133 return GENERAL_REGS;
23135 return rclass;
23138 /* Debug version of rs6000_preferred_reload_class. */
23139 static enum reg_class
23140 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
23142 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
23144 fprintf (stderr,
23145 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23146 "mode = %s, x:\n",
23147 reg_class_names[ret], reg_class_names[rclass],
23148 GET_MODE_NAME (GET_MODE (x)));
23149 debug_rtx (x);
23151 return ret;
23154 /* If we are copying between FP or AltiVec registers and anything else, we need
23155 a memory location. The exception is when we are targeting ppc64 and the
23156 move to/from fpr to gpr instructions are available. Also, under VSX, you
23157 can copy vector registers from the FP register set to the Altivec register
23158 set and vice versa. */
23160 static bool
23161 rs6000_secondary_memory_needed (machine_mode mode,
23162 reg_class_t from_class,
23163 reg_class_t to_class)
23165 enum rs6000_reg_type from_type, to_type;
23166 bool altivec_p = ((from_class == ALTIVEC_REGS)
23167 || (to_class == ALTIVEC_REGS));
23169 /* If a simple/direct move is available, we don't need secondary memory */
23170 from_type = reg_class_to_reg_type[(int)from_class];
23171 to_type = reg_class_to_reg_type[(int)to_class];
23173 if (rs6000_secondary_reload_move (to_type, from_type, mode,
23174 (secondary_reload_info *)0, altivec_p))
23175 return false;
23177 /* If we have a floating point or vector register class, we need to use
23178 memory to transfer the data. */
23179 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
23180 return true;
23182 return false;
23185 /* Debug version of rs6000_secondary_memory_needed. */
23186 static bool
23187 rs6000_debug_secondary_memory_needed (machine_mode mode,
23188 reg_class_t from_class,
23189 reg_class_t to_class)
23191 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
23193 fprintf (stderr,
23194 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23195 "to_class = %s, mode = %s\n",
23196 ret ? "true" : "false",
23197 reg_class_names[from_class],
23198 reg_class_names[to_class],
23199 GET_MODE_NAME (mode));
23201 return ret;
23204 /* Return the register class of a scratch register needed to copy IN into
23205 or out of a register in RCLASS in MODE. If it can be done directly,
23206 NO_REGS is returned. */
23208 static enum reg_class
23209 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
23210 rtx in)
23212 int regno;
23214 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
23215 #if TARGET_MACHO
23216 && MACHOPIC_INDIRECT
23217 #endif
23220 /* We cannot copy a symbolic operand directly into anything
23221 other than BASE_REGS for TARGET_ELF. So indicate that a
23222 register from BASE_REGS is needed as an intermediate
23223 register.
23225 On Darwin, pic addresses require a load from memory, which
23226 needs a base register. */
23227 if (rclass != BASE_REGS
23228 && (GET_CODE (in) == SYMBOL_REF
23229 || GET_CODE (in) == HIGH
23230 || GET_CODE (in) == LABEL_REF
23231 || GET_CODE (in) == CONST))
23232 return BASE_REGS;
23235 if (GET_CODE (in) == REG)
23237 regno = REGNO (in);
23238 if (regno >= FIRST_PSEUDO_REGISTER)
23240 regno = true_regnum (in);
23241 if (regno >= FIRST_PSEUDO_REGISTER)
23242 regno = -1;
23245 else if (GET_CODE (in) == SUBREG)
23247 regno = true_regnum (in);
23248 if (regno >= FIRST_PSEUDO_REGISTER)
23249 regno = -1;
23251 else
23252 regno = -1;
23254 /* If we have VSX register moves, prefer moving scalar values between
23255 Altivec registers and GPR by going via an FPR (and then via memory)
23256 instead of reloading the secondary memory address for Altivec moves. */
23257 if (TARGET_VSX
23258 && GET_MODE_SIZE (mode) < 16
23259 && !mode_supports_vmx_dform (mode)
23260 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
23261 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
23262 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
23263 && (regno >= 0 && INT_REGNO_P (regno)))))
23264 return FLOAT_REGS;
23266 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23267 into anything. */
23268 if (rclass == GENERAL_REGS || rclass == BASE_REGS
23269 || (regno >= 0 && INT_REGNO_P (regno)))
23270 return NO_REGS;
23272 /* Constants, memory, and VSX registers can go into VSX registers (both the
23273 traditional floating point and the altivec registers). */
23274 if (rclass == VSX_REGS
23275 && (regno == -1 || VSX_REGNO_P (regno)))
23276 return NO_REGS;
23278 /* Constants, memory, and FP registers can go into FP registers. */
23279 if ((regno == -1 || FP_REGNO_P (regno))
23280 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
23281 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
23283 /* Memory, and AltiVec registers can go into AltiVec registers. */
23284 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
23285 && rclass == ALTIVEC_REGS)
23286 return NO_REGS;
23288 /* We can copy among the CR registers. */
23289 if ((rclass == CR_REGS || rclass == CR0_REGS)
23290 && regno >= 0 && CR_REGNO_P (regno))
23291 return NO_REGS;
23293 /* Otherwise, we need GENERAL_REGS. */
23294 return GENERAL_REGS;
23297 /* Debug version of rs6000_secondary_reload_class. */
23298 static enum reg_class
23299 rs6000_debug_secondary_reload_class (enum reg_class rclass,
23300 machine_mode mode, rtx in)
23302 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
23303 fprintf (stderr,
23304 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23305 "mode = %s, input rtx:\n",
23306 reg_class_names[ret], reg_class_names[rclass],
23307 GET_MODE_NAME (mode));
23308 debug_rtx (in);
23310 return ret;
23313 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
23315 static bool
23316 rs6000_can_change_mode_class (machine_mode from,
23317 machine_mode to,
23318 reg_class_t rclass)
23320 unsigned from_size = GET_MODE_SIZE (from);
23321 unsigned to_size = GET_MODE_SIZE (to);
23323 if (from_size != to_size)
23325 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
23327 if (reg_classes_intersect_p (xclass, rclass))
23329 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
23330 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
23331 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
23332 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
23334 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23335 single register under VSX because the scalar part of the register
23336 is in the upper 64-bits, and not the lower 64-bits. Types like
23337 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23338 IEEE floating point can't overlap, and neither can small
23339 values. */
23341 if (to_float128_vector_p && from_float128_vector_p)
23342 return true;
23344 else if (to_float128_vector_p || from_float128_vector_p)
23345 return false;
23347 /* TDmode in floating-mode registers must always go into a register
23348 pair with the most significant word in the even-numbered register
23349 to match ISA requirements. In little-endian mode, this does not
23350 match subreg numbering, so we cannot allow subregs. */
23351 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
23352 return false;
23354 if (from_size < 8 || to_size < 8)
23355 return false;
23357 if (from_size == 8 && (8 * to_nregs) != to_size)
23358 return false;
23360 if (to_size == 8 && (8 * from_nregs) != from_size)
23361 return false;
23363 return true;
23365 else
23366 return true;
23369 if (TARGET_E500_DOUBLE
23370 && ((((to) == DFmode) + ((from) == DFmode)) == 1
23371 || (((to) == TFmode) + ((from) == TFmode)) == 1
23372 || (((to) == IFmode) + ((from) == IFmode)) == 1
23373 || (((to) == KFmode) + ((from) == KFmode)) == 1
23374 || (((to) == DDmode) + ((from) == DDmode)) == 1
23375 || (((to) == TDmode) + ((from) == TDmode)) == 1
23376 || (((to) == DImode) + ((from) == DImode)) == 1))
23377 return false;
23379 /* Since the VSX register set includes traditional floating point registers
23380 and altivec registers, just check for the size being different instead of
23381 trying to check whether the modes are vector modes. Otherwise it won't
23382 allow say DF and DI to change classes. For types like TFmode and TDmode
23383 that take 2 64-bit registers, rather than a single 128-bit register, don't
23384 allow subregs of those types to other 128 bit types. */
23385 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
23387 unsigned num_regs = (from_size + 15) / 16;
23388 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
23389 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
23390 return false;
23392 return (from_size == 8 || from_size == 16);
23395 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
23396 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23397 return false;
23399 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23400 && reg_classes_intersect_p (GENERAL_REGS, rclass))
23401 return false;
23403 return true;
23406 /* Debug version of rs6000_can_change_mode_class. */
23407 static bool
23408 rs6000_debug_can_change_mode_class (machine_mode from,
23409 machine_mode to,
23410 reg_class_t rclass)
23412 bool ret = rs6000_can_change_mode_class (from, to, rclass);
23414 fprintf (stderr,
23415 "rs6000_can_change_mode_class, return %s, from = %s, "
23416 "to = %s, rclass = %s\n",
23417 ret ? "true" : "false",
23418 GET_MODE_NAME (from), GET_MODE_NAME (to),
23419 reg_class_names[rclass]);
23421 return ret;
23424 /* Return a string to do a move operation of 128 bits of data. */
23426 const char *
23427 rs6000_output_move_128bit (rtx operands[])
23429 rtx dest = operands[0];
23430 rtx src = operands[1];
23431 machine_mode mode = GET_MODE (dest);
23432 int dest_regno;
23433 int src_regno;
23434 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23435 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23437 if (REG_P (dest))
23439 dest_regno = REGNO (dest);
23440 dest_gpr_p = INT_REGNO_P (dest_regno);
23441 dest_fp_p = FP_REGNO_P (dest_regno);
23442 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23443 dest_vsx_p = dest_fp_p | dest_vmx_p;
23445 else
23447 dest_regno = -1;
23448 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23451 if (REG_P (src))
23453 src_regno = REGNO (src);
23454 src_gpr_p = INT_REGNO_P (src_regno);
23455 src_fp_p = FP_REGNO_P (src_regno);
23456 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23457 src_vsx_p = src_fp_p | src_vmx_p;
23459 else
23461 src_regno = -1;
23462 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23465 /* Register moves. */
23466 if (dest_regno >= 0 && src_regno >= 0)
23468 if (dest_gpr_p)
23470 if (src_gpr_p)
23471 return "#";
23473 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23474 return (WORDS_BIG_ENDIAN
23475 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23476 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23478 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23479 return "#";
23482 else if (TARGET_VSX && dest_vsx_p)
23484 if (src_vsx_p)
23485 return "xxlor %x0,%x1,%x1";
23487 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23488 return (WORDS_BIG_ENDIAN
23489 ? "mtvsrdd %x0,%1,%L1"
23490 : "mtvsrdd %x0,%L1,%1");
23492 else if (TARGET_DIRECT_MOVE && src_gpr_p)
23493 return "#";
23496 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23497 return "vor %0,%1,%1";
23499 else if (dest_fp_p && src_fp_p)
23500 return "#";
23503 /* Loads. */
23504 else if (dest_regno >= 0 && MEM_P (src))
23506 if (dest_gpr_p)
23508 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23509 return "lq %0,%1";
23510 else
23511 return "#";
23514 else if (TARGET_ALTIVEC && dest_vmx_p
23515 && altivec_indexed_or_indirect_operand (src, mode))
23516 return "lvx %0,%y1";
23518 else if (TARGET_VSX && dest_vsx_p)
23520 if (mode_supports_vsx_dform_quad (mode)
23521 && quad_address_p (XEXP (src, 0), mode, true))
23522 return "lxv %x0,%1";
23524 else if (TARGET_P9_VECTOR)
23525 return "lxvx %x0,%y1";
23527 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23528 return "lxvw4x %x0,%y1";
23530 else
23531 return "lxvd2x %x0,%y1";
23534 else if (TARGET_ALTIVEC && dest_vmx_p)
23535 return "lvx %0,%y1";
23537 else if (dest_fp_p)
23538 return "#";
23541 /* Stores. */
23542 else if (src_regno >= 0 && MEM_P (dest))
23544 if (src_gpr_p)
23546 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23547 return "stq %1,%0";
23548 else
23549 return "#";
23552 else if (TARGET_ALTIVEC && src_vmx_p
23553 && altivec_indexed_or_indirect_operand (src, mode))
23554 return "stvx %1,%y0";
23556 else if (TARGET_VSX && src_vsx_p)
23558 if (mode_supports_vsx_dform_quad (mode)
23559 && quad_address_p (XEXP (dest, 0), mode, true))
23560 return "stxv %x1,%0";
23562 else if (TARGET_P9_VECTOR)
23563 return "stxvx %x1,%y0";
23565 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23566 return "stxvw4x %x1,%y0";
23568 else
23569 return "stxvd2x %x1,%y0";
23572 else if (TARGET_ALTIVEC && src_vmx_p)
23573 return "stvx %1,%y0";
23575 else if (src_fp_p)
23576 return "#";
23579 /* Constants. */
23580 else if (dest_regno >= 0
23581 && (GET_CODE (src) == CONST_INT
23582 || GET_CODE (src) == CONST_WIDE_INT
23583 || GET_CODE (src) == CONST_DOUBLE
23584 || GET_CODE (src) == CONST_VECTOR))
23586 if (dest_gpr_p)
23587 return "#";
23589 else if ((dest_vmx_p && TARGET_ALTIVEC)
23590 || (dest_vsx_p && TARGET_VSX))
23591 return output_vec_const_move (operands);
23594 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23597 /* Validate a 128-bit move. */
23598 bool
23599 rs6000_move_128bit_ok_p (rtx operands[])
23601 machine_mode mode = GET_MODE (operands[0]);
23602 return (gpc_reg_operand (operands[0], mode)
23603 || gpc_reg_operand (operands[1], mode));
23606 /* Return true if a 128-bit move needs to be split. */
23607 bool
23608 rs6000_split_128bit_ok_p (rtx operands[])
23610 if (!reload_completed)
23611 return false;
23613 if (!gpr_or_gpr_p (operands[0], operands[1]))
23614 return false;
23616 if (quad_load_store_p (operands[0], operands[1]))
23617 return false;
23619 return true;
23623 /* Given a comparison operation, return the bit number in CCR to test. We
23624 know this is a valid comparison.
23626 SCC_P is 1 if this is for an scc. That means that %D will have been
23627 used instead of %C, so the bits will be in different places.
23629 Return -1 if OP isn't a valid comparison for some reason. */
23632 ccr_bit (rtx op, int scc_p)
23634 enum rtx_code code = GET_CODE (op);
23635 machine_mode cc_mode;
23636 int cc_regnum;
23637 int base_bit;
23638 rtx reg;
23640 if (!COMPARISON_P (op))
23641 return -1;
23643 reg = XEXP (op, 0);
23645 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23647 cc_mode = GET_MODE (reg);
23648 cc_regnum = REGNO (reg);
23649 base_bit = 4 * (cc_regnum - CR0_REGNO);
23651 validate_condition_mode (code, cc_mode);
23653 /* When generating a sCOND operation, only positive conditions are
23654 allowed. */
23655 gcc_assert (!scc_p
23656 || code == EQ || code == GT || code == LT || code == UNORDERED
23657 || code == GTU || code == LTU);
23659 switch (code)
23661 case NE:
23662 return scc_p ? base_bit + 3 : base_bit + 2;
23663 case EQ:
23664 return base_bit + 2;
23665 case GT: case GTU: case UNLE:
23666 return base_bit + 1;
23667 case LT: case LTU: case UNGE:
23668 return base_bit;
23669 case ORDERED: case UNORDERED:
23670 return base_bit + 3;
23672 case GE: case GEU:
23673 /* If scc, we will have done a cror to put the bit in the
23674 unordered position. So test that bit. For integer, this is ! LT
23675 unless this is an scc insn. */
23676 return scc_p ? base_bit + 3 : base_bit;
23678 case LE: case LEU:
23679 return scc_p ? base_bit + 3 : base_bit + 1;
23681 default:
23682 gcc_unreachable ();
23686 /* Return the GOT register. */
23689 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23691 /* The second flow pass currently (June 1999) can't update
23692 regs_ever_live without disturbing other parts of the compiler, so
23693 update it here to make the prolog/epilogue code happy. */
23694 if (!can_create_pseudo_p ()
23695 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23696 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23698 crtl->uses_pic_offset_table = 1;
23700 return pic_offset_table_rtx;
23703 static rs6000_stack_t stack_info;
23705 /* Function to init struct machine_function.
23706 This will be called, via a pointer variable,
23707 from push_function_context. */
23709 static struct machine_function *
23710 rs6000_init_machine_status (void)
23712 stack_info.reload_completed = 0;
23713 return ggc_cleared_alloc<machine_function> ();
23716 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23718 /* Write out a function code label. */
23720 void
23721 rs6000_output_function_entry (FILE *file, const char *fname)
23723 if (fname[0] != '.')
23725 switch (DEFAULT_ABI)
23727 default:
23728 gcc_unreachable ();
23730 case ABI_AIX:
23731 if (DOT_SYMBOLS)
23732 putc ('.', file);
23733 else
23734 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23735 break;
23737 case ABI_ELFv2:
23738 case ABI_V4:
23739 case ABI_DARWIN:
23740 break;
23744 RS6000_OUTPUT_BASENAME (file, fname);
23747 /* Print an operand. Recognize special options, documented below. */
23749 #if TARGET_ELF
23750 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23751 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23752 #else
23753 #define SMALL_DATA_RELOC "sda21"
23754 #define SMALL_DATA_REG 0
23755 #endif
23757 void
23758 print_operand (FILE *file, rtx x, int code)
23760 int i;
23761 unsigned HOST_WIDE_INT uval;
23763 switch (code)
23765 /* %a is output_address. */
23767 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23768 output_operand. */
23770 case 'D':
23771 /* Like 'J' but get to the GT bit only. */
23772 gcc_assert (REG_P (x));
23774 /* Bit 1 is GT bit. */
23775 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23777 /* Add one for shift count in rlinm for scc. */
23778 fprintf (file, "%d", i + 1);
23779 return;
23781 case 'e':
23782 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23783 if (! INT_P (x))
23785 output_operand_lossage ("invalid %%e value");
23786 return;
23789 uval = INTVAL (x);
23790 if ((uval & 0xffff) == 0 && uval != 0)
23791 putc ('s', file);
23792 return;
23794 case 'E':
23795 /* X is a CR register. Print the number of the EQ bit of the CR */
23796 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23797 output_operand_lossage ("invalid %%E value");
23798 else
23799 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23800 return;
23802 case 'f':
23803 /* X is a CR register. Print the shift count needed to move it
23804 to the high-order four bits. */
23805 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23806 output_operand_lossage ("invalid %%f value");
23807 else
23808 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23809 return;
23811 case 'F':
23812 /* Similar, but print the count for the rotate in the opposite
23813 direction. */
23814 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23815 output_operand_lossage ("invalid %%F value");
23816 else
23817 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23818 return;
23820 case 'G':
23821 /* X is a constant integer. If it is negative, print "m",
23822 otherwise print "z". This is to make an aze or ame insn. */
23823 if (GET_CODE (x) != CONST_INT)
23824 output_operand_lossage ("invalid %%G value");
23825 else if (INTVAL (x) >= 0)
23826 putc ('z', file);
23827 else
23828 putc ('m', file);
23829 return;
23831 case 'h':
23832 /* If constant, output low-order five bits. Otherwise, write
23833 normally. */
23834 if (INT_P (x))
23835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23836 else
23837 print_operand (file, x, 0);
23838 return;
23840 case 'H':
23841 /* If constant, output low-order six bits. Otherwise, write
23842 normally. */
23843 if (INT_P (x))
23844 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23845 else
23846 print_operand (file, x, 0);
23847 return;
23849 case 'I':
23850 /* Print `i' if this is a constant, else nothing. */
23851 if (INT_P (x))
23852 putc ('i', file);
23853 return;
23855 case 'j':
23856 /* Write the bit number in CCR for jump. */
23857 i = ccr_bit (x, 0);
23858 if (i == -1)
23859 output_operand_lossage ("invalid %%j code");
23860 else
23861 fprintf (file, "%d", i);
23862 return;
23864 case 'J':
23865 /* Similar, but add one for shift count in rlinm for scc and pass
23866 scc flag to `ccr_bit'. */
23867 i = ccr_bit (x, 1);
23868 if (i == -1)
23869 output_operand_lossage ("invalid %%J code");
23870 else
23871 /* If we want bit 31, write a shift count of zero, not 32. */
23872 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23873 return;
23875 case 'k':
23876 /* X must be a constant. Write the 1's complement of the
23877 constant. */
23878 if (! INT_P (x))
23879 output_operand_lossage ("invalid %%k value");
23880 else
23881 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23882 return;
23884 case 'K':
23885 /* X must be a symbolic constant on ELF. Write an
23886 expression suitable for an 'addi' that adds in the low 16
23887 bits of the MEM. */
23888 if (GET_CODE (x) == CONST)
23890 if (GET_CODE (XEXP (x, 0)) != PLUS
23891 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23892 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23893 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23894 output_operand_lossage ("invalid %%K value");
23896 print_operand_address (file, x);
23897 fputs ("@l", file);
23898 return;
23900 /* %l is output_asm_label. */
23902 case 'L':
23903 /* Write second word of DImode or DFmode reference. Works on register
23904 or non-indexed memory only. */
23905 if (REG_P (x))
23906 fputs (reg_names[REGNO (x) + 1], file);
23907 else if (MEM_P (x))
23909 machine_mode mode = GET_MODE (x);
23910 /* Handle possible auto-increment. Since it is pre-increment and
23911 we have already done it, we can just use an offset of word. */
23912 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23913 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23914 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23915 UNITS_PER_WORD));
23916 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23917 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23918 UNITS_PER_WORD));
23919 else
23920 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23921 UNITS_PER_WORD),
23922 0));
23924 if (small_data_operand (x, GET_MODE (x)))
23925 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23926 reg_names[SMALL_DATA_REG]);
23928 return;
23930 case 'N':
23931 /* Write the number of elements in the vector times 4. */
23932 if (GET_CODE (x) != PARALLEL)
23933 output_operand_lossage ("invalid %%N value");
23934 else
23935 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23936 return;
23938 case 'O':
23939 /* Similar, but subtract 1 first. */
23940 if (GET_CODE (x) != PARALLEL)
23941 output_operand_lossage ("invalid %%O value");
23942 else
23943 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23944 return;
23946 case 'p':
23947 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23948 if (! INT_P (x)
23949 || INTVAL (x) < 0
23950 || (i = exact_log2 (INTVAL (x))) < 0)
23951 output_operand_lossage ("invalid %%p value");
23952 else
23953 fprintf (file, "%d", i);
23954 return;
23956 case 'P':
23957 /* The operand must be an indirect memory reference. The result
23958 is the register name. */
23959 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23960 || REGNO (XEXP (x, 0)) >= 32)
23961 output_operand_lossage ("invalid %%P value");
23962 else
23963 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23964 return;
23966 case 'q':
23967 /* This outputs the logical code corresponding to a boolean
23968 expression. The expression may have one or both operands
23969 negated (if one, only the first one). For condition register
23970 logical operations, it will also treat the negated
23971 CR codes as NOTs, but not handle NOTs of them. */
23973 const char *const *t = 0;
23974 const char *s;
23975 enum rtx_code code = GET_CODE (x);
23976 static const char * const tbl[3][3] = {
23977 { "and", "andc", "nor" },
23978 { "or", "orc", "nand" },
23979 { "xor", "eqv", "xor" } };
23981 if (code == AND)
23982 t = tbl[0];
23983 else if (code == IOR)
23984 t = tbl[1];
23985 else if (code == XOR)
23986 t = tbl[2];
23987 else
23988 output_operand_lossage ("invalid %%q value");
23990 if (GET_CODE (XEXP (x, 0)) != NOT)
23991 s = t[0];
23992 else
23994 if (GET_CODE (XEXP (x, 1)) == NOT)
23995 s = t[2];
23996 else
23997 s = t[1];
24000 fputs (s, file);
24002 return;
24004 case 'Q':
24005 if (! TARGET_MFCRF)
24006 return;
24007 fputc (',', file);
24008 /* FALLTHRU */
24010 case 'R':
24011 /* X is a CR register. Print the mask for `mtcrf'. */
24012 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
24013 output_operand_lossage ("invalid %%R value");
24014 else
24015 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
24016 return;
24018 case 's':
24019 /* Low 5 bits of 32 - value */
24020 if (! INT_P (x))
24021 output_operand_lossage ("invalid %%s value");
24022 else
24023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
24024 return;
24026 case 't':
24027 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
24028 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
24030 /* Bit 3 is OV bit. */
24031 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
24033 /* If we want bit 31, write a shift count of zero, not 32. */
24034 fprintf (file, "%d", i == 31 ? 0 : i + 1);
24035 return;
24037 case 'T':
24038 /* Print the symbolic name of a branch target register. */
24039 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
24040 && REGNO (x) != CTR_REGNO))
24041 output_operand_lossage ("invalid %%T value");
24042 else if (REGNO (x) == LR_REGNO)
24043 fputs ("lr", file);
24044 else
24045 fputs ("ctr", file);
24046 return;
24048 case 'u':
24049 /* High-order or low-order 16 bits of constant, whichever is non-zero,
24050 for use in unsigned operand. */
24051 if (! INT_P (x))
24053 output_operand_lossage ("invalid %%u value");
24054 return;
24057 uval = INTVAL (x);
24058 if ((uval & 0xffff) == 0)
24059 uval >>= 16;
24061 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
24062 return;
24064 case 'v':
24065 /* High-order 16 bits of constant for use in signed operand. */
24066 if (! INT_P (x))
24067 output_operand_lossage ("invalid %%v value");
24068 else
24069 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
24070 (INTVAL (x) >> 16) & 0xffff);
24071 return;
24073 case 'U':
24074 /* Print `u' if this has an auto-increment or auto-decrement. */
24075 if (MEM_P (x)
24076 && (GET_CODE (XEXP (x, 0)) == PRE_INC
24077 || GET_CODE (XEXP (x, 0)) == PRE_DEC
24078 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
24079 putc ('u', file);
24080 return;
24082 case 'V':
24083 /* Print the trap code for this operand. */
24084 switch (GET_CODE (x))
24086 case EQ:
24087 fputs ("eq", file); /* 4 */
24088 break;
24089 case NE:
24090 fputs ("ne", file); /* 24 */
24091 break;
24092 case LT:
24093 fputs ("lt", file); /* 16 */
24094 break;
24095 case LE:
24096 fputs ("le", file); /* 20 */
24097 break;
24098 case GT:
24099 fputs ("gt", file); /* 8 */
24100 break;
24101 case GE:
24102 fputs ("ge", file); /* 12 */
24103 break;
24104 case LTU:
24105 fputs ("llt", file); /* 2 */
24106 break;
24107 case LEU:
24108 fputs ("lle", file); /* 6 */
24109 break;
24110 case GTU:
24111 fputs ("lgt", file); /* 1 */
24112 break;
24113 case GEU:
24114 fputs ("lge", file); /* 5 */
24115 break;
24116 default:
24117 gcc_unreachable ();
24119 break;
24121 case 'w':
24122 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24123 normally. */
24124 if (INT_P (x))
24125 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
24126 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
24127 else
24128 print_operand (file, x, 0);
24129 return;
24131 case 'x':
24132 /* X is a FPR or Altivec register used in a VSX context. */
24133 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
24134 output_operand_lossage ("invalid %%x value");
24135 else
24137 int reg = REGNO (x);
24138 int vsx_reg = (FP_REGNO_P (reg)
24139 ? reg - 32
24140 : reg - FIRST_ALTIVEC_REGNO + 32);
24142 #ifdef TARGET_REGNAMES
24143 if (TARGET_REGNAMES)
24144 fprintf (file, "%%vs%d", vsx_reg);
24145 else
24146 #endif
24147 fprintf (file, "%d", vsx_reg);
24149 return;
24151 case 'X':
24152 if (MEM_P (x)
24153 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
24154 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
24155 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
24156 putc ('x', file);
24157 return;
24159 case 'Y':
24160 /* Like 'L', for third word of TImode/PTImode */
24161 if (REG_P (x))
24162 fputs (reg_names[REGNO (x) + 2], file);
24163 else if (MEM_P (x))
24165 machine_mode mode = GET_MODE (x);
24166 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24167 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24168 output_address (mode, plus_constant (Pmode,
24169 XEXP (XEXP (x, 0), 0), 8));
24170 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24171 output_address (mode, plus_constant (Pmode,
24172 XEXP (XEXP (x, 0), 0), 8));
24173 else
24174 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
24175 if (small_data_operand (x, GET_MODE (x)))
24176 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24177 reg_names[SMALL_DATA_REG]);
24179 return;
24181 case 'z':
24182 /* X is a SYMBOL_REF. Write out the name preceded by a
24183 period and without any trailing data in brackets. Used for function
24184 names. If we are configured for System V (or the embedded ABI) on
24185 the PowerPC, do not emit the period, since those systems do not use
24186 TOCs and the like. */
24187 gcc_assert (GET_CODE (x) == SYMBOL_REF);
24189 /* For macho, check to see if we need a stub. */
24190 if (TARGET_MACHO)
24192 const char *name = XSTR (x, 0);
24193 #if TARGET_MACHO
24194 if (darwin_emit_branch_islands
24195 && MACHOPIC_INDIRECT
24196 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
24197 name = machopic_indirection_name (x, /*stub_p=*/true);
24198 #endif
24199 assemble_name (file, name);
24201 else if (!DOT_SYMBOLS)
24202 assemble_name (file, XSTR (x, 0));
24203 else
24204 rs6000_output_function_entry (file, XSTR (x, 0));
24205 return;
24207 case 'Z':
24208 /* Like 'L', for last word of TImode/PTImode. */
24209 if (REG_P (x))
24210 fputs (reg_names[REGNO (x) + 3], file);
24211 else if (MEM_P (x))
24213 machine_mode mode = GET_MODE (x);
24214 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24215 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24216 output_address (mode, plus_constant (Pmode,
24217 XEXP (XEXP (x, 0), 0), 12));
24218 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24219 output_address (mode, plus_constant (Pmode,
24220 XEXP (XEXP (x, 0), 0), 12));
24221 else
24222 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
24223 if (small_data_operand (x, GET_MODE (x)))
24224 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24225 reg_names[SMALL_DATA_REG]);
24227 return;
24229 /* Print AltiVec or SPE memory operand. */
24230 case 'y':
24232 rtx tmp;
24234 gcc_assert (MEM_P (x));
24236 tmp = XEXP (x, 0);
24238 /* Ugly hack because %y is overloaded. */
24239 if ((TARGET_SPE || TARGET_E500_DOUBLE)
24240 && (GET_MODE_SIZE (GET_MODE (x)) == 8
24241 || FLOAT128_2REG_P (GET_MODE (x))
24242 || GET_MODE (x) == TImode
24243 || GET_MODE (x) == PTImode))
24245 /* Handle [reg]. */
24246 if (REG_P (tmp))
24248 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
24249 break;
24251 /* Handle [reg+UIMM]. */
24252 else if (GET_CODE (tmp) == PLUS &&
24253 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
24255 int x;
24257 gcc_assert (REG_P (XEXP (tmp, 0)));
24259 x = INTVAL (XEXP (tmp, 1));
24260 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
24261 break;
24264 /* Fall through. Must be [reg+reg]. */
24266 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
24267 && GET_CODE (tmp) == AND
24268 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
24269 && INTVAL (XEXP (tmp, 1)) == -16)
24270 tmp = XEXP (tmp, 0);
24271 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
24272 && GET_CODE (tmp) == PRE_MODIFY)
24273 tmp = XEXP (tmp, 1);
24274 if (REG_P (tmp))
24275 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
24276 else
24278 if (GET_CODE (tmp) != PLUS
24279 || !REG_P (XEXP (tmp, 0))
24280 || !REG_P (XEXP (tmp, 1)))
24282 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24283 break;
24286 if (REGNO (XEXP (tmp, 0)) == 0)
24287 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
24288 reg_names[ REGNO (XEXP (tmp, 0)) ]);
24289 else
24290 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
24291 reg_names[ REGNO (XEXP (tmp, 1)) ]);
24293 break;
24296 case 0:
24297 if (REG_P (x))
24298 fprintf (file, "%s", reg_names[REGNO (x)]);
24299 else if (MEM_P (x))
24301 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24302 know the width from the mode. */
24303 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
24304 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
24305 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24306 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
24307 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
24308 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24309 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24310 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
24311 else
24312 output_address (GET_MODE (x), XEXP (x, 0));
24314 else
24316 if (toc_relative_expr_p (x, false))
24317 /* This hack along with a corresponding hack in
24318 rs6000_output_addr_const_extra arranges to output addends
24319 where the assembler expects to find them. eg.
24320 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24321 without this hack would be output as "x@toc+4". We
24322 want "x+4@toc". */
24323 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24324 else
24325 output_addr_const (file, x);
24327 return;
24329 case '&':
24330 if (const char *name = get_some_local_dynamic_name ())
24331 assemble_name (file, name);
24332 else
24333 output_operand_lossage ("'%%&' used without any "
24334 "local dynamic TLS references");
24335 return;
24337 default:
24338 output_operand_lossage ("invalid %%xn code");
24342 /* Print the address of an operand. */
24344 void
24345 print_operand_address (FILE *file, rtx x)
24347 if (REG_P (x))
24348 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
24349 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
24350 || GET_CODE (x) == LABEL_REF)
24352 output_addr_const (file, x);
24353 if (small_data_operand (x, GET_MODE (x)))
24354 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24355 reg_names[SMALL_DATA_REG]);
24356 else
24357 gcc_assert (!TARGET_TOC);
24359 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24360 && REG_P (XEXP (x, 1)))
24362 if (REGNO (XEXP (x, 0)) == 0)
24363 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
24364 reg_names[ REGNO (XEXP (x, 0)) ]);
24365 else
24366 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
24367 reg_names[ REGNO (XEXP (x, 1)) ]);
24369 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24370 && GET_CODE (XEXP (x, 1)) == CONST_INT)
24371 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
24372 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
24373 #if TARGET_MACHO
24374 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24375 && CONSTANT_P (XEXP (x, 1)))
24377 fprintf (file, "lo16(");
24378 output_addr_const (file, XEXP (x, 1));
24379 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24381 #endif
24382 #if TARGET_ELF
24383 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24384 && CONSTANT_P (XEXP (x, 1)))
24386 output_addr_const (file, XEXP (x, 1));
24387 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24389 #endif
24390 else if (toc_relative_expr_p (x, false))
24392 /* This hack along with a corresponding hack in
24393 rs6000_output_addr_const_extra arranges to output addends
24394 where the assembler expects to find them. eg.
24395 (lo_sum (reg 9)
24396 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24397 without this hack would be output as "x@toc+8@l(9)". We
24398 want "x+8@toc@l(9)". */
24399 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24400 if (GET_CODE (x) == LO_SUM)
24401 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24402 else
24403 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24405 else
24406 gcc_unreachable ();
24409 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24411 static bool
24412 rs6000_output_addr_const_extra (FILE *file, rtx x)
24414 if (GET_CODE (x) == UNSPEC)
24415 switch (XINT (x, 1))
24417 case UNSPEC_TOCREL:
24418 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24419 && REG_P (XVECEXP (x, 0, 1))
24420 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24421 output_addr_const (file, XVECEXP (x, 0, 0));
24422 if (x == tocrel_base && tocrel_offset != const0_rtx)
24424 if (INTVAL (tocrel_offset) >= 0)
24425 fprintf (file, "+");
24426 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24428 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24430 putc ('-', file);
24431 assemble_name (file, toc_label_name);
24432 need_toc_init = 1;
24434 else if (TARGET_ELF)
24435 fputs ("@toc", file);
24436 return true;
24438 #if TARGET_MACHO
24439 case UNSPEC_MACHOPIC_OFFSET:
24440 output_addr_const (file, XVECEXP (x, 0, 0));
24441 putc ('-', file);
24442 machopic_output_function_base_name (file);
24443 return true;
24444 #endif
24446 return false;
24449 /* Target hook for assembling integer objects. The PowerPC version has
24450 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24451 is defined. It also needs to handle DI-mode objects on 64-bit
24452 targets. */
24454 static bool
24455 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24457 #ifdef RELOCATABLE_NEEDS_FIXUP
24458 /* Special handling for SI values. */
24459 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24461 static int recurse = 0;
24463 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24464 the .fixup section. Since the TOC section is already relocated, we
24465 don't need to mark it here. We used to skip the text section, but it
24466 should never be valid for relocated addresses to be placed in the text
24467 section. */
24468 if (DEFAULT_ABI == ABI_V4
24469 && (TARGET_RELOCATABLE || flag_pic > 1)
24470 && in_section != toc_section
24471 && !recurse
24472 && !CONST_SCALAR_INT_P (x)
24473 && CONSTANT_P (x))
24475 char buf[256];
24477 recurse = 1;
24478 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24479 fixuplabelno++;
24480 ASM_OUTPUT_LABEL (asm_out_file, buf);
24481 fprintf (asm_out_file, "\t.long\t(");
24482 output_addr_const (asm_out_file, x);
24483 fprintf (asm_out_file, ")@fixup\n");
24484 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24485 ASM_OUTPUT_ALIGN (asm_out_file, 2);
24486 fprintf (asm_out_file, "\t.long\t");
24487 assemble_name (asm_out_file, buf);
24488 fprintf (asm_out_file, "\n\t.previous\n");
24489 recurse = 0;
24490 return true;
24492 /* Remove initial .'s to turn a -mcall-aixdesc function
24493 address into the address of the descriptor, not the function
24494 itself. */
24495 else if (GET_CODE (x) == SYMBOL_REF
24496 && XSTR (x, 0)[0] == '.'
24497 && DEFAULT_ABI == ABI_AIX)
24499 const char *name = XSTR (x, 0);
24500 while (*name == '.')
24501 name++;
24503 fprintf (asm_out_file, "\t.long\t%s\n", name);
24504 return true;
24507 #endif /* RELOCATABLE_NEEDS_FIXUP */
24508 return default_assemble_integer (x, size, aligned_p);
24511 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24512 /* Emit an assembler directive to set symbol visibility for DECL to
24513 VISIBILITY_TYPE. */
24515 static void
24516 rs6000_assemble_visibility (tree decl, int vis)
24518 if (TARGET_XCOFF)
24519 return;
24521 /* Functions need to have their entry point symbol visibility set as
24522 well as their descriptor symbol visibility. */
24523 if (DEFAULT_ABI == ABI_AIX
24524 && DOT_SYMBOLS
24525 && TREE_CODE (decl) == FUNCTION_DECL)
24527 static const char * const visibility_types[] = {
24528 NULL, "protected", "hidden", "internal"
24531 const char *name, *type;
24533 name = ((* targetm.strip_name_encoding)
24534 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24535 type = visibility_types[vis];
24537 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24538 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24540 else
24541 default_assemble_visibility (decl, vis);
24543 #endif
24545 enum rtx_code
24546 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24548 /* Reversal of FP compares takes care -- an ordered compare
24549 becomes an unordered compare and vice versa. */
24550 if (mode == CCFPmode
24551 && (!flag_finite_math_only
24552 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24553 || code == UNEQ || code == LTGT))
24554 return reverse_condition_maybe_unordered (code);
24555 else
24556 return reverse_condition (code);
24559 /* Generate a compare for CODE. Return a brand-new rtx that
24560 represents the result of the compare. */
24562 static rtx
24563 rs6000_generate_compare (rtx cmp, machine_mode mode)
24565 machine_mode comp_mode;
24566 rtx compare_result;
24567 enum rtx_code code = GET_CODE (cmp);
24568 rtx op0 = XEXP (cmp, 0);
24569 rtx op1 = XEXP (cmp, 1);
24571 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24572 comp_mode = CCmode;
24573 else if (FLOAT_MODE_P (mode))
24574 comp_mode = CCFPmode;
24575 else if (code == GTU || code == LTU
24576 || code == GEU || code == LEU)
24577 comp_mode = CCUNSmode;
24578 else if ((code == EQ || code == NE)
24579 && unsigned_reg_p (op0)
24580 && (unsigned_reg_p (op1)
24581 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24582 /* These are unsigned values, perhaps there will be a later
24583 ordering compare that can be shared with this one. */
24584 comp_mode = CCUNSmode;
24585 else
24586 comp_mode = CCmode;
24588 /* If we have an unsigned compare, make sure we don't have a signed value as
24589 an immediate. */
24590 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24591 && INTVAL (op1) < 0)
24593 op0 = copy_rtx_if_shared (op0);
24594 op1 = force_reg (GET_MODE (op0), op1);
24595 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24598 /* First, the compare. */
24599 compare_result = gen_reg_rtx (comp_mode);
24601 /* E500 FP compare instructions on the GPRs. Yuck! */
24602 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24603 && FLOAT_MODE_P (mode))
24605 rtx cmp, or_result, compare_result2;
24606 machine_mode op_mode = GET_MODE (op0);
24607 bool reverse_p;
24609 if (op_mode == VOIDmode)
24610 op_mode = GET_MODE (op1);
24612 /* First reverse the condition codes that aren't directly supported. */
24613 switch (code)
24615 case NE:
24616 case UNLT:
24617 case UNLE:
24618 case UNGT:
24619 case UNGE:
24620 code = reverse_condition_maybe_unordered (code);
24621 reverse_p = true;
24622 break;
24624 case EQ:
24625 case LT:
24626 case LE:
24627 case GT:
24628 case GE:
24629 reverse_p = false;
24630 break;
24632 default:
24633 gcc_unreachable ();
24636 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24637 This explains the following mess. */
24639 switch (code)
24641 case EQ:
24642 switch (op_mode)
24644 case E_SFmode:
24645 cmp = (flag_finite_math_only && !flag_trapping_math)
24646 ? gen_tstsfeq_gpr (compare_result, op0, op1)
24647 : gen_cmpsfeq_gpr (compare_result, op0, op1);
24648 break;
24650 case E_DFmode:
24651 cmp = (flag_finite_math_only && !flag_trapping_math)
24652 ? gen_tstdfeq_gpr (compare_result, op0, op1)
24653 : gen_cmpdfeq_gpr (compare_result, op0, op1);
24654 break;
24656 case E_TFmode:
24657 case E_IFmode:
24658 case E_KFmode:
24659 cmp = (flag_finite_math_only && !flag_trapping_math)
24660 ? gen_tsttfeq_gpr (compare_result, op0, op1)
24661 : gen_cmptfeq_gpr (compare_result, op0, op1);
24662 break;
24664 default:
24665 gcc_unreachable ();
24667 break;
24669 case GT:
24670 case GE:
24671 switch (op_mode)
24673 case E_SFmode:
24674 cmp = (flag_finite_math_only && !flag_trapping_math)
24675 ? gen_tstsfgt_gpr (compare_result, op0, op1)
24676 : gen_cmpsfgt_gpr (compare_result, op0, op1);
24677 break;
24679 case E_DFmode:
24680 cmp = (flag_finite_math_only && !flag_trapping_math)
24681 ? gen_tstdfgt_gpr (compare_result, op0, op1)
24682 : gen_cmpdfgt_gpr (compare_result, op0, op1);
24683 break;
24685 case E_TFmode:
24686 case E_IFmode:
24687 case E_KFmode:
24688 cmp = (flag_finite_math_only && !flag_trapping_math)
24689 ? gen_tsttfgt_gpr (compare_result, op0, op1)
24690 : gen_cmptfgt_gpr (compare_result, op0, op1);
24691 break;
24693 default:
24694 gcc_unreachable ();
24696 break;
24698 case LT:
24699 case LE:
24700 switch (op_mode)
24702 case E_SFmode:
24703 cmp = (flag_finite_math_only && !flag_trapping_math)
24704 ? gen_tstsflt_gpr (compare_result, op0, op1)
24705 : gen_cmpsflt_gpr (compare_result, op0, op1);
24706 break;
24708 case E_DFmode:
24709 cmp = (flag_finite_math_only && !flag_trapping_math)
24710 ? gen_tstdflt_gpr (compare_result, op0, op1)
24711 : gen_cmpdflt_gpr (compare_result, op0, op1);
24712 break;
24714 case E_TFmode:
24715 case E_IFmode:
24716 case E_KFmode:
24717 cmp = (flag_finite_math_only && !flag_trapping_math)
24718 ? gen_tsttflt_gpr (compare_result, op0, op1)
24719 : gen_cmptflt_gpr (compare_result, op0, op1);
24720 break;
24722 default:
24723 gcc_unreachable ();
24725 break;
24727 default:
24728 gcc_unreachable ();
24731 /* Synthesize LE and GE from LT/GT || EQ. */
24732 if (code == LE || code == GE)
24734 emit_insn (cmp);
24736 compare_result2 = gen_reg_rtx (CCFPmode);
24738 /* Do the EQ. */
24739 switch (op_mode)
24741 case E_SFmode:
24742 cmp = (flag_finite_math_only && !flag_trapping_math)
24743 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
24744 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
24745 break;
24747 case E_DFmode:
24748 cmp = (flag_finite_math_only && !flag_trapping_math)
24749 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
24750 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
24751 break;
24753 case E_TFmode:
24754 case E_IFmode:
24755 case E_KFmode:
24756 cmp = (flag_finite_math_only && !flag_trapping_math)
24757 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
24758 : gen_cmptfeq_gpr (compare_result2, op0, op1);
24759 break;
24761 default:
24762 gcc_unreachable ();
24765 emit_insn (cmp);
24767 /* OR them together. */
24768 or_result = gen_reg_rtx (CCFPmode);
24769 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24770 compare_result2);
24771 compare_result = or_result;
24774 code = reverse_p ? NE : EQ;
24776 emit_insn (cmp);
24779 /* IEEE 128-bit support in VSX registers when we do not have hardware
24780 support. */
24781 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24783 rtx libfunc = NULL_RTX;
24784 bool check_nan = false;
24785 rtx dest;
24787 switch (code)
24789 case EQ:
24790 case NE:
24791 libfunc = optab_libfunc (eq_optab, mode);
24792 break;
24794 case GT:
24795 case GE:
24796 libfunc = optab_libfunc (ge_optab, mode);
24797 break;
24799 case LT:
24800 case LE:
24801 libfunc = optab_libfunc (le_optab, mode);
24802 break;
24804 case UNORDERED:
24805 case ORDERED:
24806 libfunc = optab_libfunc (unord_optab, mode);
24807 code = (code == UNORDERED) ? NE : EQ;
24808 break;
24810 case UNGE:
24811 case UNGT:
24812 check_nan = true;
24813 libfunc = optab_libfunc (ge_optab, mode);
24814 code = (code == UNGE) ? GE : GT;
24815 break;
24817 case UNLE:
24818 case UNLT:
24819 check_nan = true;
24820 libfunc = optab_libfunc (le_optab, mode);
24821 code = (code == UNLE) ? LE : LT;
24822 break;
24824 case UNEQ:
24825 case LTGT:
24826 check_nan = true;
24827 libfunc = optab_libfunc (eq_optab, mode);
24828 code = (code = UNEQ) ? EQ : NE;
24829 break;
24831 default:
24832 gcc_unreachable ();
24835 gcc_assert (libfunc);
24837 if (!check_nan)
24838 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24839 SImode, op0, mode, op1, mode);
24841 /* The library signals an exception for signalling NaNs, so we need to
24842 handle isgreater, etc. by first checking isordered. */
24843 else
24845 rtx ne_rtx, normal_dest, unord_dest;
24846 rtx unord_func = optab_libfunc (unord_optab, mode);
24847 rtx join_label = gen_label_rtx ();
24848 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24849 rtx unord_cmp = gen_reg_rtx (comp_mode);
24852 /* Test for either value being a NaN. */
24853 gcc_assert (unord_func);
24854 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24855 SImode, op0, mode, op1, mode);
24857 /* Set value (0) if either value is a NaN, and jump to the join
24858 label. */
24859 dest = gen_reg_rtx (SImode);
24860 emit_move_insn (dest, const1_rtx);
24861 emit_insn (gen_rtx_SET (unord_cmp,
24862 gen_rtx_COMPARE (comp_mode, unord_dest,
24863 const0_rtx)));
24865 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24866 emit_jump_insn (gen_rtx_SET (pc_rtx,
24867 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24868 join_ref,
24869 pc_rtx)));
24871 /* Do the normal comparison, knowing that the values are not
24872 NaNs. */
24873 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24874 SImode, op0, mode, op1, mode);
24876 emit_insn (gen_cstoresi4 (dest,
24877 gen_rtx_fmt_ee (code, SImode, normal_dest,
24878 const0_rtx),
24879 normal_dest, const0_rtx));
24881 /* Join NaN and non-Nan paths. Compare dest against 0. */
24882 emit_label (join_label);
24883 code = NE;
24886 emit_insn (gen_rtx_SET (compare_result,
24887 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24890 else
24892 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24893 CLOBBERs to match cmptf_internal2 pattern. */
24894 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24895 && FLOAT128_IBM_P (GET_MODE (op0))
24896 && TARGET_HARD_FLOAT && TARGET_FPRS)
24897 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24898 gen_rtvec (10,
24899 gen_rtx_SET (compare_result,
24900 gen_rtx_COMPARE (comp_mode, op0, op1)),
24901 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24902 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24903 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24904 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24905 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24906 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24907 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24908 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24909 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24910 else if (GET_CODE (op1) == UNSPEC
24911 && XINT (op1, 1) == UNSPEC_SP_TEST)
24913 rtx op1b = XVECEXP (op1, 0, 0);
24914 comp_mode = CCEQmode;
24915 compare_result = gen_reg_rtx (CCEQmode);
24916 if (TARGET_64BIT)
24917 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24918 else
24919 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24921 else
24922 emit_insn (gen_rtx_SET (compare_result,
24923 gen_rtx_COMPARE (comp_mode, op0, op1)));
24926 /* Some kinds of FP comparisons need an OR operation;
24927 under flag_finite_math_only we don't bother. */
24928 if (FLOAT_MODE_P (mode)
24929 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24930 && !flag_finite_math_only
24931 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24932 && (code == LE || code == GE
24933 || code == UNEQ || code == LTGT
24934 || code == UNGT || code == UNLT))
24936 enum rtx_code or1, or2;
24937 rtx or1_rtx, or2_rtx, compare2_rtx;
24938 rtx or_result = gen_reg_rtx (CCEQmode);
24940 switch (code)
24942 case LE: or1 = LT; or2 = EQ; break;
24943 case GE: or1 = GT; or2 = EQ; break;
24944 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24945 case LTGT: or1 = LT; or2 = GT; break;
24946 case UNGT: or1 = UNORDERED; or2 = GT; break;
24947 case UNLT: or1 = UNORDERED; or2 = LT; break;
24948 default: gcc_unreachable ();
24950 validate_condition_mode (or1, comp_mode);
24951 validate_condition_mode (or2, comp_mode);
24952 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24953 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24954 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24955 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24956 const_true_rtx);
24957 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24959 compare_result = or_result;
24960 code = EQ;
24963 validate_condition_mode (code, GET_MODE (compare_result));
24965 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24969 /* Return the diagnostic message string if the binary operation OP is
24970 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24972 static const char*
24973 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24974 const_tree type1,
24975 const_tree type2)
24977 machine_mode mode1 = TYPE_MODE (type1);
24978 machine_mode mode2 = TYPE_MODE (type2);
24980 /* For complex modes, use the inner type. */
24981 if (COMPLEX_MODE_P (mode1))
24982 mode1 = GET_MODE_INNER (mode1);
24984 if (COMPLEX_MODE_P (mode2))
24985 mode2 = GET_MODE_INNER (mode2);
24987 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24988 double to intermix unless -mfloat128-convert. */
24989 if (mode1 == mode2)
24990 return NULL;
24992 if (!TARGET_FLOAT128_CVT)
24994 if ((mode1 == KFmode && mode2 == IFmode)
24995 || (mode1 == IFmode && mode2 == KFmode))
24996 return N_("__float128 and __ibm128 cannot be used in the same "
24997 "expression");
24999 if (TARGET_IEEEQUAD
25000 && ((mode1 == IFmode && mode2 == TFmode)
25001 || (mode1 == TFmode && mode2 == IFmode)))
25002 return N_("__ibm128 and long double cannot be used in the same "
25003 "expression");
25005 if (!TARGET_IEEEQUAD
25006 && ((mode1 == KFmode && mode2 == TFmode)
25007 || (mode1 == TFmode && mode2 == KFmode)))
25008 return N_("__float128 and long double cannot be used in the same "
25009 "expression");
25012 return NULL;
25016 /* Expand floating point conversion to/from __float128 and __ibm128. */
25018 void
25019 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
25021 machine_mode dest_mode = GET_MODE (dest);
25022 machine_mode src_mode = GET_MODE (src);
25023 convert_optab cvt = unknown_optab;
25024 bool do_move = false;
25025 rtx libfunc = NULL_RTX;
25026 rtx dest2;
25027 typedef rtx (*rtx_2func_t) (rtx, rtx);
25028 rtx_2func_t hw_convert = (rtx_2func_t)0;
25029 size_t kf_or_tf;
25031 struct hw_conv_t {
25032 rtx_2func_t from_df;
25033 rtx_2func_t from_sf;
25034 rtx_2func_t from_si_sign;
25035 rtx_2func_t from_si_uns;
25036 rtx_2func_t from_di_sign;
25037 rtx_2func_t from_di_uns;
25038 rtx_2func_t to_df;
25039 rtx_2func_t to_sf;
25040 rtx_2func_t to_si_sign;
25041 rtx_2func_t to_si_uns;
25042 rtx_2func_t to_di_sign;
25043 rtx_2func_t to_di_uns;
25044 } hw_conversions[2] = {
25045 /* convertions to/from KFmode */
25047 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
25048 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
25049 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
25050 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
25051 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
25052 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
25053 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
25054 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
25055 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
25056 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
25057 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
25058 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
25061 /* convertions to/from TFmode */
25063 gen_extenddftf2_hw, /* TFmode <- DFmode. */
25064 gen_extendsftf2_hw, /* TFmode <- SFmode. */
25065 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
25066 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
25067 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
25068 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
25069 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
25070 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
25071 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
25072 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
25073 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
25074 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
25078 if (dest_mode == src_mode)
25079 gcc_unreachable ();
25081 /* Eliminate memory operations. */
25082 if (MEM_P (src))
25083 src = force_reg (src_mode, src);
25085 if (MEM_P (dest))
25087 rtx tmp = gen_reg_rtx (dest_mode);
25088 rs6000_expand_float128_convert (tmp, src, unsigned_p);
25089 rs6000_emit_move (dest, tmp, dest_mode);
25090 return;
25093 /* Convert to IEEE 128-bit floating point. */
25094 if (FLOAT128_IEEE_P (dest_mode))
25096 if (dest_mode == KFmode)
25097 kf_or_tf = 0;
25098 else if (dest_mode == TFmode)
25099 kf_or_tf = 1;
25100 else
25101 gcc_unreachable ();
25103 switch (src_mode)
25105 case E_DFmode:
25106 cvt = sext_optab;
25107 hw_convert = hw_conversions[kf_or_tf].from_df;
25108 break;
25110 case E_SFmode:
25111 cvt = sext_optab;
25112 hw_convert = hw_conversions[kf_or_tf].from_sf;
25113 break;
25115 case E_KFmode:
25116 case E_IFmode:
25117 case E_TFmode:
25118 if (FLOAT128_IBM_P (src_mode))
25119 cvt = sext_optab;
25120 else
25121 do_move = true;
25122 break;
25124 case E_SImode:
25125 if (unsigned_p)
25127 cvt = ufloat_optab;
25128 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
25130 else
25132 cvt = sfloat_optab;
25133 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
25135 break;
25137 case E_DImode:
25138 if (unsigned_p)
25140 cvt = ufloat_optab;
25141 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
25143 else
25145 cvt = sfloat_optab;
25146 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
25148 break;
25150 default:
25151 gcc_unreachable ();
25155 /* Convert from IEEE 128-bit floating point. */
25156 else if (FLOAT128_IEEE_P (src_mode))
25158 if (src_mode == KFmode)
25159 kf_or_tf = 0;
25160 else if (src_mode == TFmode)
25161 kf_or_tf = 1;
25162 else
25163 gcc_unreachable ();
25165 switch (dest_mode)
25167 case E_DFmode:
25168 cvt = trunc_optab;
25169 hw_convert = hw_conversions[kf_or_tf].to_df;
25170 break;
25172 case E_SFmode:
25173 cvt = trunc_optab;
25174 hw_convert = hw_conversions[kf_or_tf].to_sf;
25175 break;
25177 case E_KFmode:
25178 case E_IFmode:
25179 case E_TFmode:
25180 if (FLOAT128_IBM_P (dest_mode))
25181 cvt = trunc_optab;
25182 else
25183 do_move = true;
25184 break;
25186 case E_SImode:
25187 if (unsigned_p)
25189 cvt = ufix_optab;
25190 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
25192 else
25194 cvt = sfix_optab;
25195 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
25197 break;
25199 case E_DImode:
25200 if (unsigned_p)
25202 cvt = ufix_optab;
25203 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
25205 else
25207 cvt = sfix_optab;
25208 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
25210 break;
25212 default:
25213 gcc_unreachable ();
25217 /* Both IBM format. */
25218 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
25219 do_move = true;
25221 else
25222 gcc_unreachable ();
25224 /* Handle conversion between TFmode/KFmode. */
25225 if (do_move)
25226 emit_move_insn (dest, gen_lowpart (dest_mode, src));
25228 /* Handle conversion if we have hardware support. */
25229 else if (TARGET_FLOAT128_HW && hw_convert)
25230 emit_insn ((hw_convert) (dest, src));
25232 /* Call an external function to do the conversion. */
25233 else if (cvt != unknown_optab)
25235 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
25236 gcc_assert (libfunc != NULL_RTX);
25238 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
25239 src, src_mode);
25241 gcc_assert (dest2 != NULL_RTX);
25242 if (!rtx_equal_p (dest, dest2))
25243 emit_move_insn (dest, dest2);
25246 else
25247 gcc_unreachable ();
25249 return;
25253 /* Emit the RTL for an sISEL pattern. */
25255 void
25256 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
25258 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
25261 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25262 can be used as that dest register. Return the dest register. */
25265 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
25267 if (op2 == const0_rtx)
25268 return op1;
25270 if (GET_CODE (scratch) == SCRATCH)
25271 scratch = gen_reg_rtx (mode);
25273 if (logical_operand (op2, mode))
25274 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
25275 else
25276 emit_insn (gen_rtx_SET (scratch,
25277 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
25279 return scratch;
25282 void
25283 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
25285 rtx condition_rtx;
25286 machine_mode op_mode;
25287 enum rtx_code cond_code;
25288 rtx result = operands[0];
25290 condition_rtx = rs6000_generate_compare (operands[1], mode);
25291 cond_code = GET_CODE (condition_rtx);
25293 if (FLOAT_MODE_P (mode)
25294 && !TARGET_FPRS && TARGET_HARD_FLOAT)
25296 rtx t;
25298 PUT_MODE (condition_rtx, SImode);
25299 t = XEXP (condition_rtx, 0);
25301 gcc_assert (cond_code == NE || cond_code == EQ);
25303 if (cond_code == NE)
25304 emit_insn (gen_e500_flip_gt_bit (t, t));
25306 emit_insn (gen_move_from_CR_gt_bit (result, t));
25307 return;
25310 if (cond_code == NE
25311 || cond_code == GE || cond_code == LE
25312 || cond_code == GEU || cond_code == LEU
25313 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
25315 rtx not_result = gen_reg_rtx (CCEQmode);
25316 rtx not_op, rev_cond_rtx;
25317 machine_mode cc_mode;
25319 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
25321 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
25322 SImode, XEXP (condition_rtx, 0), const0_rtx);
25323 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
25324 emit_insn (gen_rtx_SET (not_result, not_op));
25325 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
25328 op_mode = GET_MODE (XEXP (operands[1], 0));
25329 if (op_mode == VOIDmode)
25330 op_mode = GET_MODE (XEXP (operands[1], 1));
25332 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
25334 PUT_MODE (condition_rtx, DImode);
25335 convert_move (result, condition_rtx, 0);
25337 else
25339 PUT_MODE (condition_rtx, SImode);
25340 emit_insn (gen_rtx_SET (result, condition_rtx));
25344 /* Emit a branch of kind CODE to location LOC. */
25346 void
25347 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
25349 rtx condition_rtx, loc_ref;
25351 condition_rtx = rs6000_generate_compare (operands[0], mode);
25352 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
25353 emit_jump_insn (gen_rtx_SET (pc_rtx,
25354 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
25355 loc_ref, pc_rtx)));
25358 /* Return the string to output a conditional branch to LABEL, which is
25359 the operand template of the label, or NULL if the branch is really a
25360 conditional return.
25362 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25363 condition code register and its mode specifies what kind of
25364 comparison we made.
25366 REVERSED is nonzero if we should reverse the sense of the comparison.
25368 INSN is the insn. */
25370 char *
25371 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
25373 static char string[64];
25374 enum rtx_code code = GET_CODE (op);
25375 rtx cc_reg = XEXP (op, 0);
25376 machine_mode mode = GET_MODE (cc_reg);
25377 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
25378 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
25379 int really_reversed = reversed ^ need_longbranch;
25380 char *s = string;
25381 const char *ccode;
25382 const char *pred;
25383 rtx note;
25385 validate_condition_mode (code, mode);
25387 /* Work out which way this really branches. We could use
25388 reverse_condition_maybe_unordered here always but this
25389 makes the resulting assembler clearer. */
25390 if (really_reversed)
25392 /* Reversal of FP compares takes care -- an ordered compare
25393 becomes an unordered compare and vice versa. */
25394 if (mode == CCFPmode)
25395 code = reverse_condition_maybe_unordered (code);
25396 else
25397 code = reverse_condition (code);
25400 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25402 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25403 to the GT bit. */
25404 switch (code)
25406 case EQ:
25407 /* Opposite of GT. */
25408 code = GT;
25409 break;
25411 case NE:
25412 code = UNLE;
25413 break;
25415 default:
25416 gcc_unreachable ();
25420 switch (code)
25422 /* Not all of these are actually distinct opcodes, but
25423 we distinguish them for clarity of the resulting assembler. */
25424 case NE: case LTGT:
25425 ccode = "ne"; break;
25426 case EQ: case UNEQ:
25427 ccode = "eq"; break;
25428 case GE: case GEU:
25429 ccode = "ge"; break;
25430 case GT: case GTU: case UNGT:
25431 ccode = "gt"; break;
25432 case LE: case LEU:
25433 ccode = "le"; break;
25434 case LT: case LTU: case UNLT:
25435 ccode = "lt"; break;
25436 case UNORDERED: ccode = "un"; break;
25437 case ORDERED: ccode = "nu"; break;
25438 case UNGE: ccode = "nl"; break;
25439 case UNLE: ccode = "ng"; break;
25440 default:
25441 gcc_unreachable ();
25444 /* Maybe we have a guess as to how likely the branch is. */
25445 pred = "";
25446 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25447 if (note != NULL_RTX)
25449 /* PROB is the difference from 50%. */
25450 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
25451 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
25453 /* Only hint for highly probable/improbable branches on newer cpus when
25454 we have real profile data, as static prediction overrides processor
25455 dynamic prediction. For older cpus we may as well always hint, but
25456 assume not taken for branches that are very close to 50% as a
25457 mispredicted taken branch is more expensive than a
25458 mispredicted not-taken branch. */
25459 if (rs6000_always_hint
25460 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25461 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25462 && br_prob_note_reliable_p (note)))
25464 if (abs (prob) > REG_BR_PROB_BASE / 20
25465 && ((prob > 0) ^ need_longbranch))
25466 pred = "+";
25467 else
25468 pred = "-";
25472 if (label == NULL)
25473 s += sprintf (s, "b%slr%s ", ccode, pred);
25474 else
25475 s += sprintf (s, "b%s%s ", ccode, pred);
25477 /* We need to escape any '%' characters in the reg_names string.
25478 Assume they'd only be the first character.... */
25479 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25480 *s++ = '%';
25481 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25483 if (label != NULL)
25485 /* If the branch distance was too far, we may have to use an
25486 unconditional branch to go the distance. */
25487 if (need_longbranch)
25488 s += sprintf (s, ",$+8\n\tb %s", label);
25489 else
25490 s += sprintf (s, ",%s", label);
25493 return string;
25496 /* Return the string to flip the GT bit on a CR. */
25497 char *
25498 output_e500_flip_gt_bit (rtx dst, rtx src)
25500 static char string[64];
25501 int a, b;
25503 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25504 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25506 /* GT bit. */
25507 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25508 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25510 sprintf (string, "crnot %d,%d", a, b);
25511 return string;
25514 /* Return insn for VSX or Altivec comparisons. */
25516 static rtx
25517 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25519 rtx mask;
25520 machine_mode mode = GET_MODE (op0);
25522 switch (code)
25524 default:
25525 break;
25527 case GE:
25528 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25529 return NULL_RTX;
25530 /* FALLTHRU */
25532 case EQ:
25533 case GT:
25534 case GTU:
25535 case ORDERED:
25536 case UNORDERED:
25537 case UNEQ:
25538 case LTGT:
25539 mask = gen_reg_rtx (mode);
25540 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25541 return mask;
25544 return NULL_RTX;
25547 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25548 DMODE is expected destination mode. This is a recursive function. */
25550 static rtx
25551 rs6000_emit_vector_compare (enum rtx_code rcode,
25552 rtx op0, rtx op1,
25553 machine_mode dmode)
25555 rtx mask;
25556 bool swap_operands = false;
25557 bool try_again = false;
25559 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25560 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25562 /* See if the comparison works as is. */
25563 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25564 if (mask)
25565 return mask;
25567 switch (rcode)
25569 case LT:
25570 rcode = GT;
25571 swap_operands = true;
25572 try_again = true;
25573 break;
25574 case LTU:
25575 rcode = GTU;
25576 swap_operands = true;
25577 try_again = true;
25578 break;
25579 case NE:
25580 case UNLE:
25581 case UNLT:
25582 case UNGE:
25583 case UNGT:
25584 /* Invert condition and try again.
25585 e.g., A != B becomes ~(A==B). */
25587 enum rtx_code rev_code;
25588 enum insn_code nor_code;
25589 rtx mask2;
25591 rev_code = reverse_condition_maybe_unordered (rcode);
25592 if (rev_code == UNKNOWN)
25593 return NULL_RTX;
25595 nor_code = optab_handler (one_cmpl_optab, dmode);
25596 if (nor_code == CODE_FOR_nothing)
25597 return NULL_RTX;
25599 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25600 if (!mask2)
25601 return NULL_RTX;
25603 mask = gen_reg_rtx (dmode);
25604 emit_insn (GEN_FCN (nor_code) (mask, mask2));
25605 return mask;
25607 break;
25608 case GE:
25609 case GEU:
25610 case LE:
25611 case LEU:
25612 /* Try GT/GTU/LT/LTU OR EQ */
25614 rtx c_rtx, eq_rtx;
25615 enum insn_code ior_code;
25616 enum rtx_code new_code;
25618 switch (rcode)
25620 case GE:
25621 new_code = GT;
25622 break;
25624 case GEU:
25625 new_code = GTU;
25626 break;
25628 case LE:
25629 new_code = LT;
25630 break;
25632 case LEU:
25633 new_code = LTU;
25634 break;
25636 default:
25637 gcc_unreachable ();
25640 ior_code = optab_handler (ior_optab, dmode);
25641 if (ior_code == CODE_FOR_nothing)
25642 return NULL_RTX;
25644 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25645 if (!c_rtx)
25646 return NULL_RTX;
25648 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25649 if (!eq_rtx)
25650 return NULL_RTX;
25652 mask = gen_reg_rtx (dmode);
25653 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25654 return mask;
25656 break;
25657 default:
25658 return NULL_RTX;
25661 if (try_again)
25663 if (swap_operands)
25664 std::swap (op0, op1);
25666 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25667 if (mask)
25668 return mask;
25671 /* You only get two chances. */
25672 return NULL_RTX;
25675 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25676 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25677 operands for the relation operation COND. */
25680 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25681 rtx cond, rtx cc_op0, rtx cc_op1)
25683 machine_mode dest_mode = GET_MODE (dest);
25684 machine_mode mask_mode = GET_MODE (cc_op0);
25685 enum rtx_code rcode = GET_CODE (cond);
25686 machine_mode cc_mode = CCmode;
25687 rtx mask;
25688 rtx cond2;
25689 bool invert_move = false;
25691 if (VECTOR_UNIT_NONE_P (dest_mode))
25692 return 0;
25694 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25695 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25697 switch (rcode)
25699 /* Swap operands if we can, and fall back to doing the operation as
25700 specified, and doing a NOR to invert the test. */
25701 case NE:
25702 case UNLE:
25703 case UNLT:
25704 case UNGE:
25705 case UNGT:
25706 /* Invert condition and try again.
25707 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25708 invert_move = true;
25709 rcode = reverse_condition_maybe_unordered (rcode);
25710 if (rcode == UNKNOWN)
25711 return 0;
25712 break;
25714 case GE:
25715 case LE:
25716 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25718 /* Invert condition to avoid compound test. */
25719 invert_move = true;
25720 rcode = reverse_condition (rcode);
25722 break;
25724 case GTU:
25725 case GEU:
25726 case LTU:
25727 case LEU:
25728 /* Mark unsigned tests with CCUNSmode. */
25729 cc_mode = CCUNSmode;
25731 /* Invert condition to avoid compound test if necessary. */
25732 if (rcode == GEU || rcode == LEU)
25734 invert_move = true;
25735 rcode = reverse_condition (rcode);
25737 break;
25739 default:
25740 break;
25743 /* Get the vector mask for the given relational operations. */
25744 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25746 if (!mask)
25747 return 0;
25749 if (invert_move)
25750 std::swap (op_true, op_false);
25752 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25753 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25754 && (GET_CODE (op_true) == CONST_VECTOR
25755 || GET_CODE (op_false) == CONST_VECTOR))
25757 rtx constant_0 = CONST0_RTX (dest_mode);
25758 rtx constant_m1 = CONSTM1_RTX (dest_mode);
25760 if (op_true == constant_m1 && op_false == constant_0)
25762 emit_move_insn (dest, mask);
25763 return 1;
25766 else if (op_true == constant_0 && op_false == constant_m1)
25768 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25769 return 1;
25772 /* If we can't use the vector comparison directly, perhaps we can use
25773 the mask for the true or false fields, instead of loading up a
25774 constant. */
25775 if (op_true == constant_m1)
25776 op_true = mask;
25778 if (op_false == constant_0)
25779 op_false = mask;
25782 if (!REG_P (op_true) && !SUBREG_P (op_true))
25783 op_true = force_reg (dest_mode, op_true);
25785 if (!REG_P (op_false) && !SUBREG_P (op_false))
25786 op_false = force_reg (dest_mode, op_false);
25788 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25789 CONST0_RTX (dest_mode));
25790 emit_insn (gen_rtx_SET (dest,
25791 gen_rtx_IF_THEN_ELSE (dest_mode,
25792 cond2,
25793 op_true,
25794 op_false)));
25795 return 1;
25798 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25799 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25800 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25801 hardware has no such operation. */
25803 static int
25804 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25806 enum rtx_code code = GET_CODE (op);
25807 rtx op0 = XEXP (op, 0);
25808 rtx op1 = XEXP (op, 1);
25809 machine_mode compare_mode = GET_MODE (op0);
25810 machine_mode result_mode = GET_MODE (dest);
25811 bool max_p = false;
25813 if (result_mode != compare_mode)
25814 return 0;
25816 if (code == GE || code == GT)
25817 max_p = true;
25818 else if (code == LE || code == LT)
25819 max_p = false;
25820 else
25821 return 0;
25823 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25826 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25827 max_p = !max_p;
25829 else
25830 return 0;
25832 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25833 return 1;
25836 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25837 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25838 operands of the last comparison is nonzero/true, FALSE_COND if it is
25839 zero/false. Return 0 if the hardware has no such operation. */
25841 static int
25842 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25844 enum rtx_code code = GET_CODE (op);
25845 rtx op0 = XEXP (op, 0);
25846 rtx op1 = XEXP (op, 1);
25847 machine_mode result_mode = GET_MODE (dest);
25848 rtx compare_rtx;
25849 rtx cmove_rtx;
25850 rtx clobber_rtx;
25852 if (!can_create_pseudo_p ())
25853 return 0;
25855 switch (code)
25857 case EQ:
25858 case GE:
25859 case GT:
25860 break;
25862 case NE:
25863 case LT:
25864 case LE:
25865 code = swap_condition (code);
25866 std::swap (op0, op1);
25867 break;
25869 default:
25870 return 0;
25873 /* Generate: [(parallel [(set (dest)
25874 (if_then_else (op (cmp1) (cmp2))
25875 (true)
25876 (false)))
25877 (clobber (scratch))])]. */
25879 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25880 cmove_rtx = gen_rtx_SET (dest,
25881 gen_rtx_IF_THEN_ELSE (result_mode,
25882 compare_rtx,
25883 true_cond,
25884 false_cond));
25886 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25887 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25888 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25890 return 1;
25893 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25894 operands of the last comparison is nonzero/true, FALSE_COND if it
25895 is zero/false. Return 0 if the hardware has no such operation. */
25898 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25900 enum rtx_code code = GET_CODE (op);
25901 rtx op0 = XEXP (op, 0);
25902 rtx op1 = XEXP (op, 1);
25903 machine_mode compare_mode = GET_MODE (op0);
25904 machine_mode result_mode = GET_MODE (dest);
25905 rtx temp;
25906 bool is_against_zero;
25908 /* These modes should always match. */
25909 if (GET_MODE (op1) != compare_mode
25910 /* In the isel case however, we can use a compare immediate, so
25911 op1 may be a small constant. */
25912 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25913 return 0;
25914 if (GET_MODE (true_cond) != result_mode)
25915 return 0;
25916 if (GET_MODE (false_cond) != result_mode)
25917 return 0;
25919 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25920 if (TARGET_P9_MINMAX
25921 && (compare_mode == SFmode || compare_mode == DFmode)
25922 && (result_mode == SFmode || result_mode == DFmode))
25924 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25925 return 1;
25927 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25928 return 1;
25931 /* Don't allow using floating point comparisons for integer results for
25932 now. */
25933 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25934 return 0;
25936 /* First, work out if the hardware can do this at all, or
25937 if it's too slow.... */
25938 if (!FLOAT_MODE_P (compare_mode))
25940 if (TARGET_ISEL)
25941 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25942 return 0;
25944 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25945 && SCALAR_FLOAT_MODE_P (compare_mode))
25946 return 0;
25948 is_against_zero = op1 == CONST0_RTX (compare_mode);
25950 /* A floating-point subtract might overflow, underflow, or produce
25951 an inexact result, thus changing the floating-point flags, so it
25952 can't be generated if we care about that. It's safe if one side
25953 of the construct is zero, since then no subtract will be
25954 generated. */
25955 if (SCALAR_FLOAT_MODE_P (compare_mode)
25956 && flag_trapping_math && ! is_against_zero)
25957 return 0;
25959 /* Eliminate half of the comparisons by switching operands, this
25960 makes the remaining code simpler. */
25961 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25962 || code == LTGT || code == LT || code == UNLE)
25964 code = reverse_condition_maybe_unordered (code);
25965 temp = true_cond;
25966 true_cond = false_cond;
25967 false_cond = temp;
25970 /* UNEQ and LTGT take four instructions for a comparison with zero,
25971 it'll probably be faster to use a branch here too. */
25972 if (code == UNEQ && HONOR_NANS (compare_mode))
25973 return 0;
25975 /* We're going to try to implement comparisons by performing
25976 a subtract, then comparing against zero. Unfortunately,
25977 Inf - Inf is NaN which is not zero, and so if we don't
25978 know that the operand is finite and the comparison
25979 would treat EQ different to UNORDERED, we can't do it. */
25980 if (HONOR_INFINITIES (compare_mode)
25981 && code != GT && code != UNGE
25982 && (GET_CODE (op1) != CONST_DOUBLE
25983 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25984 /* Constructs of the form (a OP b ? a : b) are safe. */
25985 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25986 || (! rtx_equal_p (op0, true_cond)
25987 && ! rtx_equal_p (op1, true_cond))))
25988 return 0;
25990 /* At this point we know we can use fsel. */
25992 /* Reduce the comparison to a comparison against zero. */
25993 if (! is_against_zero)
25995 temp = gen_reg_rtx (compare_mode);
25996 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25997 op0 = temp;
25998 op1 = CONST0_RTX (compare_mode);
26001 /* If we don't care about NaNs we can reduce some of the comparisons
26002 down to faster ones. */
26003 if (! HONOR_NANS (compare_mode))
26004 switch (code)
26006 case GT:
26007 code = LE;
26008 temp = true_cond;
26009 true_cond = false_cond;
26010 false_cond = temp;
26011 break;
26012 case UNGE:
26013 code = GE;
26014 break;
26015 case UNEQ:
26016 code = EQ;
26017 break;
26018 default:
26019 break;
26022 /* Now, reduce everything down to a GE. */
26023 switch (code)
26025 case GE:
26026 break;
26028 case LE:
26029 temp = gen_reg_rtx (compare_mode);
26030 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26031 op0 = temp;
26032 break;
26034 case ORDERED:
26035 temp = gen_reg_rtx (compare_mode);
26036 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
26037 op0 = temp;
26038 break;
26040 case EQ:
26041 temp = gen_reg_rtx (compare_mode);
26042 emit_insn (gen_rtx_SET (temp,
26043 gen_rtx_NEG (compare_mode,
26044 gen_rtx_ABS (compare_mode, op0))));
26045 op0 = temp;
26046 break;
26048 case UNGE:
26049 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
26050 temp = gen_reg_rtx (result_mode);
26051 emit_insn (gen_rtx_SET (temp,
26052 gen_rtx_IF_THEN_ELSE (result_mode,
26053 gen_rtx_GE (VOIDmode,
26054 op0, op1),
26055 true_cond, false_cond)));
26056 false_cond = true_cond;
26057 true_cond = temp;
26059 temp = gen_reg_rtx (compare_mode);
26060 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26061 op0 = temp;
26062 break;
26064 case GT:
26065 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
26066 temp = gen_reg_rtx (result_mode);
26067 emit_insn (gen_rtx_SET (temp,
26068 gen_rtx_IF_THEN_ELSE (result_mode,
26069 gen_rtx_GE (VOIDmode,
26070 op0, op1),
26071 true_cond, false_cond)));
26072 true_cond = false_cond;
26073 false_cond = temp;
26075 temp = gen_reg_rtx (compare_mode);
26076 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26077 op0 = temp;
26078 break;
26080 default:
26081 gcc_unreachable ();
26084 emit_insn (gen_rtx_SET (dest,
26085 gen_rtx_IF_THEN_ELSE (result_mode,
26086 gen_rtx_GE (VOIDmode,
26087 op0, op1),
26088 true_cond, false_cond)));
26089 return 1;
26092 /* Same as above, but for ints (isel). */
26094 static int
26095 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
26097 rtx condition_rtx, cr;
26098 machine_mode mode = GET_MODE (dest);
26099 enum rtx_code cond_code;
26100 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
26101 bool signedp;
26103 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
26104 return 0;
26106 /* We still have to do the compare, because isel doesn't do a
26107 compare, it just looks at the CRx bits set by a previous compare
26108 instruction. */
26109 condition_rtx = rs6000_generate_compare (op, mode);
26110 cond_code = GET_CODE (condition_rtx);
26111 cr = XEXP (condition_rtx, 0);
26112 signedp = GET_MODE (cr) == CCmode;
26114 isel_func = (mode == SImode
26115 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
26116 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
26118 switch (cond_code)
26120 case LT: case GT: case LTU: case GTU: case EQ:
26121 /* isel handles these directly. */
26122 break;
26124 default:
26125 /* We need to swap the sense of the comparison. */
26127 std::swap (false_cond, true_cond);
26128 PUT_CODE (condition_rtx, reverse_condition (cond_code));
26130 break;
26133 false_cond = force_reg (mode, false_cond);
26134 if (true_cond != const0_rtx)
26135 true_cond = force_reg (mode, true_cond);
26137 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
26139 return 1;
26142 const char *
26143 output_isel (rtx *operands)
26145 enum rtx_code code;
26147 code = GET_CODE (operands[1]);
26149 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
26151 gcc_assert (GET_CODE (operands[2]) == REG
26152 && GET_CODE (operands[3]) == REG);
26153 PUT_CODE (operands[1], reverse_condition (code));
26154 return "isel %0,%3,%2,%j1";
26157 return "isel %0,%2,%3,%j1";
26160 void
26161 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
26163 machine_mode mode = GET_MODE (op0);
26164 enum rtx_code c;
26165 rtx target;
26167 /* VSX/altivec have direct min/max insns. */
26168 if ((code == SMAX || code == SMIN)
26169 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
26170 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
26172 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
26173 return;
26176 if (code == SMAX || code == SMIN)
26177 c = GE;
26178 else
26179 c = GEU;
26181 if (code == SMAX || code == UMAX)
26182 target = emit_conditional_move (dest, c, op0, op1, mode,
26183 op0, op1, mode, 0);
26184 else
26185 target = emit_conditional_move (dest, c, op0, op1, mode,
26186 op1, op0, mode, 0);
26187 gcc_assert (target);
26188 if (target != dest)
26189 emit_move_insn (dest, target);
26192 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26193 for the value to come from memory or if it is already loaded into a GPR. */
26195 void
26196 rs6000_split_signbit (rtx dest, rtx src)
26198 machine_mode d_mode = GET_MODE (dest);
26199 machine_mode s_mode = GET_MODE (src);
26200 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
26201 rtx shift_reg = dest_di;
26203 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
26205 if (MEM_P (src))
26207 rtx mem = (WORDS_BIG_ENDIAN
26208 ? adjust_address (src, DImode, 0)
26209 : adjust_address (src, DImode, 8));
26210 emit_insn (gen_rtx_SET (dest_di, mem));
26213 else
26215 unsigned int r = reg_or_subregno (src);
26217 if (INT_REGNO_P (r))
26218 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
26220 else
26222 /* Generate the special mfvsrd instruction to get it in a GPR. */
26223 gcc_assert (VSX_REGNO_P (r));
26224 if (s_mode == KFmode)
26225 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
26226 else
26227 emit_insn (gen_signbittf2_dm2 (dest_di, src));
26231 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
26232 return;
26235 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26236 COND is true. Mark the jump as unlikely to be taken. */
26238 static void
26239 emit_unlikely_jump (rtx cond, rtx label)
26241 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
26242 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
26243 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
26246 /* A subroutine of the atomic operation splitters. Emit a load-locked
26247 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26248 the zero_extend operation. */
26250 static void
26251 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
26253 rtx (*fn) (rtx, rtx) = NULL;
26255 switch (mode)
26257 case E_QImode:
26258 fn = gen_load_lockedqi;
26259 break;
26260 case E_HImode:
26261 fn = gen_load_lockedhi;
26262 break;
26263 case E_SImode:
26264 if (GET_MODE (mem) == QImode)
26265 fn = gen_load_lockedqi_si;
26266 else if (GET_MODE (mem) == HImode)
26267 fn = gen_load_lockedhi_si;
26268 else
26269 fn = gen_load_lockedsi;
26270 break;
26271 case E_DImode:
26272 fn = gen_load_lockeddi;
26273 break;
26274 case E_TImode:
26275 fn = gen_load_lockedti;
26276 break;
26277 default:
26278 gcc_unreachable ();
26280 emit_insn (fn (reg, mem));
26283 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26284 instruction in MODE. */
26286 static void
26287 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
26289 rtx (*fn) (rtx, rtx, rtx) = NULL;
26291 switch (mode)
26293 case E_QImode:
26294 fn = gen_store_conditionalqi;
26295 break;
26296 case E_HImode:
26297 fn = gen_store_conditionalhi;
26298 break;
26299 case E_SImode:
26300 fn = gen_store_conditionalsi;
26301 break;
26302 case E_DImode:
26303 fn = gen_store_conditionaldi;
26304 break;
26305 case E_TImode:
26306 fn = gen_store_conditionalti;
26307 break;
26308 default:
26309 gcc_unreachable ();
26312 /* Emit sync before stwcx. to address PPC405 Erratum. */
26313 if (PPC405_ERRATUM77)
26314 emit_insn (gen_hwsync ());
26316 emit_insn (fn (res, mem, val));
26319 /* Expand barriers before and after a load_locked/store_cond sequence. */
26321 static rtx
26322 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
26324 rtx addr = XEXP (mem, 0);
26325 int strict_p = (reload_in_progress || reload_completed);
26327 if (!legitimate_indirect_address_p (addr, strict_p)
26328 && !legitimate_indexed_address_p (addr, strict_p))
26330 addr = force_reg (Pmode, addr);
26331 mem = replace_equiv_address_nv (mem, addr);
26334 switch (model)
26336 case MEMMODEL_RELAXED:
26337 case MEMMODEL_CONSUME:
26338 case MEMMODEL_ACQUIRE:
26339 break;
26340 case MEMMODEL_RELEASE:
26341 case MEMMODEL_ACQ_REL:
26342 emit_insn (gen_lwsync ());
26343 break;
26344 case MEMMODEL_SEQ_CST:
26345 emit_insn (gen_hwsync ());
26346 break;
26347 default:
26348 gcc_unreachable ();
26350 return mem;
26353 static void
26354 rs6000_post_atomic_barrier (enum memmodel model)
26356 switch (model)
26358 case MEMMODEL_RELAXED:
26359 case MEMMODEL_CONSUME:
26360 case MEMMODEL_RELEASE:
26361 break;
26362 case MEMMODEL_ACQUIRE:
26363 case MEMMODEL_ACQ_REL:
26364 case MEMMODEL_SEQ_CST:
26365 emit_insn (gen_isync ());
26366 break;
26367 default:
26368 gcc_unreachable ();
26372 /* A subroutine of the various atomic expanders. For sub-word operations,
26373 we must adjust things to operate on SImode. Given the original MEM,
26374 return a new aligned memory. Also build and return the quantities by
26375 which to shift and mask. */
26377 static rtx
26378 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
26380 rtx addr, align, shift, mask, mem;
26381 HOST_WIDE_INT shift_mask;
26382 machine_mode mode = GET_MODE (orig_mem);
26384 /* For smaller modes, we have to implement this via SImode. */
26385 shift_mask = (mode == QImode ? 0x18 : 0x10);
26387 addr = XEXP (orig_mem, 0);
26388 addr = force_reg (GET_MODE (addr), addr);
26390 /* Aligned memory containing subword. Generate a new memory. We
26391 do not want any of the existing MEM_ATTR data, as we're now
26392 accessing memory outside the original object. */
26393 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
26394 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26395 mem = gen_rtx_MEM (SImode, align);
26396 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26397 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26398 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26400 /* Shift amount for subword relative to aligned word. */
26401 shift = gen_reg_rtx (SImode);
26402 addr = gen_lowpart (SImode, addr);
26403 rtx tmp = gen_reg_rtx (SImode);
26404 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26405 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26406 if (BYTES_BIG_ENDIAN)
26407 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26408 shift, 1, OPTAB_LIB_WIDEN);
26409 *pshift = shift;
26411 /* Mask for insertion. */
26412 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26413 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26414 *pmask = mask;
26416 return mem;
26419 /* A subroutine of the various atomic expanders. For sub-word operands,
26420 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26422 static rtx
26423 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26425 rtx x;
26427 x = gen_reg_rtx (SImode);
26428 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26429 gen_rtx_NOT (SImode, mask),
26430 oldval)));
26432 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26434 return x;
26437 /* A subroutine of the various atomic expanders. For sub-word operands,
26438 extract WIDE to NARROW via SHIFT. */
26440 static void
26441 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26443 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26444 wide, 1, OPTAB_LIB_WIDEN);
26445 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26448 /* Expand an atomic compare and swap operation. */
26450 void
26451 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26453 rtx boolval, retval, mem, oldval, newval, cond;
26454 rtx label1, label2, x, mask, shift;
26455 machine_mode mode, orig_mode;
26456 enum memmodel mod_s, mod_f;
26457 bool is_weak;
26459 boolval = operands[0];
26460 retval = operands[1];
26461 mem = operands[2];
26462 oldval = operands[3];
26463 newval = operands[4];
26464 is_weak = (INTVAL (operands[5]) != 0);
26465 mod_s = memmodel_base (INTVAL (operands[6]));
26466 mod_f = memmodel_base (INTVAL (operands[7]));
26467 orig_mode = mode = GET_MODE (mem);
26469 mask = shift = NULL_RTX;
26470 if (mode == QImode || mode == HImode)
26472 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26473 lwarx and shift/mask operations. With power8, we need to do the
26474 comparison in SImode, but the store is still done in QI/HImode. */
26475 oldval = convert_modes (SImode, mode, oldval, 1);
26477 if (!TARGET_SYNC_HI_QI)
26479 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26481 /* Shift and mask OLDVAL into position with the word. */
26482 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26483 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26485 /* Shift and mask NEWVAL into position within the word. */
26486 newval = convert_modes (SImode, mode, newval, 1);
26487 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26488 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26491 /* Prepare to adjust the return value. */
26492 retval = gen_reg_rtx (SImode);
26493 mode = SImode;
26495 else if (reg_overlap_mentioned_p (retval, oldval))
26496 oldval = copy_to_reg (oldval);
26498 if (mode != TImode && !reg_or_short_operand (oldval, mode))
26499 oldval = copy_to_mode_reg (mode, oldval);
26501 if (reg_overlap_mentioned_p (retval, newval))
26502 newval = copy_to_reg (newval);
26504 mem = rs6000_pre_atomic_barrier (mem, mod_s);
26506 label1 = NULL_RTX;
26507 if (!is_weak)
26509 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26510 emit_label (XEXP (label1, 0));
26512 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26514 emit_load_locked (mode, retval, mem);
26516 x = retval;
26517 if (mask)
26518 x = expand_simple_binop (SImode, AND, retval, mask,
26519 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26521 cond = gen_reg_rtx (CCmode);
26522 /* If we have TImode, synthesize a comparison. */
26523 if (mode != TImode)
26524 x = gen_rtx_COMPARE (CCmode, x, oldval);
26525 else
26527 rtx xor1_result = gen_reg_rtx (DImode);
26528 rtx xor2_result = gen_reg_rtx (DImode);
26529 rtx or_result = gen_reg_rtx (DImode);
26530 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26531 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26532 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26533 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26535 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26536 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26537 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26538 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26541 emit_insn (gen_rtx_SET (cond, x));
26543 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26544 emit_unlikely_jump (x, label2);
26546 x = newval;
26547 if (mask)
26548 x = rs6000_mask_atomic_subword (retval, newval, mask);
26550 emit_store_conditional (orig_mode, cond, mem, x);
26552 if (!is_weak)
26554 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26555 emit_unlikely_jump (x, label1);
26558 if (!is_mm_relaxed (mod_f))
26559 emit_label (XEXP (label2, 0));
26561 rs6000_post_atomic_barrier (mod_s);
26563 if (is_mm_relaxed (mod_f))
26564 emit_label (XEXP (label2, 0));
26566 if (shift)
26567 rs6000_finish_atomic_subword (operands[1], retval, shift);
26568 else if (mode != GET_MODE (operands[1]))
26569 convert_move (operands[1], retval, 1);
26571 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26572 x = gen_rtx_EQ (SImode, cond, const0_rtx);
26573 emit_insn (gen_rtx_SET (boolval, x));
26576 /* Expand an atomic exchange operation. */
26578 void
26579 rs6000_expand_atomic_exchange (rtx operands[])
26581 rtx retval, mem, val, cond;
26582 machine_mode mode;
26583 enum memmodel model;
26584 rtx label, x, mask, shift;
26586 retval = operands[0];
26587 mem = operands[1];
26588 val = operands[2];
26589 model = memmodel_base (INTVAL (operands[3]));
26590 mode = GET_MODE (mem);
26592 mask = shift = NULL_RTX;
26593 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26595 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26597 /* Shift and mask VAL into position with the word. */
26598 val = convert_modes (SImode, mode, val, 1);
26599 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26600 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26602 /* Prepare to adjust the return value. */
26603 retval = gen_reg_rtx (SImode);
26604 mode = SImode;
26607 mem = rs6000_pre_atomic_barrier (mem, model);
26609 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26610 emit_label (XEXP (label, 0));
26612 emit_load_locked (mode, retval, mem);
26614 x = val;
26615 if (mask)
26616 x = rs6000_mask_atomic_subword (retval, val, mask);
26618 cond = gen_reg_rtx (CCmode);
26619 emit_store_conditional (mode, cond, mem, x);
26621 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26622 emit_unlikely_jump (x, label);
26624 rs6000_post_atomic_barrier (model);
26626 if (shift)
26627 rs6000_finish_atomic_subword (operands[0], retval, shift);
26630 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26631 to perform. MEM is the memory on which to operate. VAL is the second
26632 operand of the binary operator. BEFORE and AFTER are optional locations to
26633 return the value of MEM either before of after the operation. MODEL_RTX
26634 is a CONST_INT containing the memory model to use. */
26636 void
26637 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26638 rtx orig_before, rtx orig_after, rtx model_rtx)
26640 enum memmodel model = memmodel_base (INTVAL (model_rtx));
26641 machine_mode mode = GET_MODE (mem);
26642 machine_mode store_mode = mode;
26643 rtx label, x, cond, mask, shift;
26644 rtx before = orig_before, after = orig_after;
26646 mask = shift = NULL_RTX;
26647 /* On power8, we want to use SImode for the operation. On previous systems,
26648 use the operation in a subword and shift/mask to get the proper byte or
26649 halfword. */
26650 if (mode == QImode || mode == HImode)
26652 if (TARGET_SYNC_HI_QI)
26654 val = convert_modes (SImode, mode, val, 1);
26656 /* Prepare to adjust the return value. */
26657 before = gen_reg_rtx (SImode);
26658 if (after)
26659 after = gen_reg_rtx (SImode);
26660 mode = SImode;
26662 else
26664 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26666 /* Shift and mask VAL into position with the word. */
26667 val = convert_modes (SImode, mode, val, 1);
26668 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26671 switch (code)
26673 case IOR:
26674 case XOR:
26675 /* We've already zero-extended VAL. That is sufficient to
26676 make certain that it does not affect other bits. */
26677 mask = NULL;
26678 break;
26680 case AND:
26681 /* If we make certain that all of the other bits in VAL are
26682 set, that will be sufficient to not affect other bits. */
26683 x = gen_rtx_NOT (SImode, mask);
26684 x = gen_rtx_IOR (SImode, x, val);
26685 emit_insn (gen_rtx_SET (val, x));
26686 mask = NULL;
26687 break;
26689 case NOT:
26690 case PLUS:
26691 case MINUS:
26692 /* These will all affect bits outside the field and need
26693 adjustment via MASK within the loop. */
26694 break;
26696 default:
26697 gcc_unreachable ();
26700 /* Prepare to adjust the return value. */
26701 before = gen_reg_rtx (SImode);
26702 if (after)
26703 after = gen_reg_rtx (SImode);
26704 store_mode = mode = SImode;
26708 mem = rs6000_pre_atomic_barrier (mem, model);
26710 label = gen_label_rtx ();
26711 emit_label (label);
26712 label = gen_rtx_LABEL_REF (VOIDmode, label);
26714 if (before == NULL_RTX)
26715 before = gen_reg_rtx (mode);
26717 emit_load_locked (mode, before, mem);
26719 if (code == NOT)
26721 x = expand_simple_binop (mode, AND, before, val,
26722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26723 after = expand_simple_unop (mode, NOT, x, after, 1);
26725 else
26727 after = expand_simple_binop (mode, code, before, val,
26728 after, 1, OPTAB_LIB_WIDEN);
26731 x = after;
26732 if (mask)
26734 x = expand_simple_binop (SImode, AND, after, mask,
26735 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26736 x = rs6000_mask_atomic_subword (before, x, mask);
26738 else if (store_mode != mode)
26739 x = convert_modes (store_mode, mode, x, 1);
26741 cond = gen_reg_rtx (CCmode);
26742 emit_store_conditional (store_mode, cond, mem, x);
26744 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26745 emit_unlikely_jump (x, label);
26747 rs6000_post_atomic_barrier (model);
26749 if (shift)
26751 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26752 then do the calcuations in a SImode register. */
26753 if (orig_before)
26754 rs6000_finish_atomic_subword (orig_before, before, shift);
26755 if (orig_after)
26756 rs6000_finish_atomic_subword (orig_after, after, shift);
26758 else if (store_mode != mode)
26760 /* QImode/HImode on machines with lbarx/lharx where we do the native
26761 operation and then do the calcuations in a SImode register. */
26762 if (orig_before)
26763 convert_move (orig_before, before, 1);
26764 if (orig_after)
26765 convert_move (orig_after, after, 1);
26767 else if (orig_after && after != orig_after)
26768 emit_move_insn (orig_after, after);
26771 /* Emit instructions to move SRC to DST. Called by splitters for
26772 multi-register moves. It will emit at most one instruction for
26773 each register that is accessed; that is, it won't emit li/lis pairs
26774 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26775 register. */
26777 void
26778 rs6000_split_multireg_move (rtx dst, rtx src)
26780 /* The register number of the first register being moved. */
26781 int reg;
26782 /* The mode that is to be moved. */
26783 machine_mode mode;
26784 /* The mode that the move is being done in, and its size. */
26785 machine_mode reg_mode;
26786 int reg_mode_size;
26787 /* The number of registers that will be moved. */
26788 int nregs;
26790 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26791 mode = GET_MODE (dst);
26792 nregs = hard_regno_nregs (reg, mode);
26793 if (FP_REGNO_P (reg))
26794 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26795 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26796 else if (ALTIVEC_REGNO_P (reg))
26797 reg_mode = V16QImode;
26798 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26799 reg_mode = DFmode;
26800 else
26801 reg_mode = word_mode;
26802 reg_mode_size = GET_MODE_SIZE (reg_mode);
26804 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26806 /* TDmode residing in FP registers is special, since the ISA requires that
26807 the lower-numbered word of a register pair is always the most significant
26808 word, even in little-endian mode. This does not match the usual subreg
26809 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26810 the appropriate constituent registers "by hand" in little-endian mode.
26812 Note we do not need to check for destructive overlap here since TDmode
26813 can only reside in even/odd register pairs. */
26814 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26816 rtx p_src, p_dst;
26817 int i;
26819 for (i = 0; i < nregs; i++)
26821 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26822 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26823 else
26824 p_src = simplify_gen_subreg (reg_mode, src, mode,
26825 i * reg_mode_size);
26827 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26828 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26829 else
26830 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26831 i * reg_mode_size);
26833 emit_insn (gen_rtx_SET (p_dst, p_src));
26836 return;
26839 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26841 /* Move register range backwards, if we might have destructive
26842 overlap. */
26843 int i;
26844 for (i = nregs - 1; i >= 0; i--)
26845 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26846 i * reg_mode_size),
26847 simplify_gen_subreg (reg_mode, src, mode,
26848 i * reg_mode_size)));
26850 else
26852 int i;
26853 int j = -1;
26854 bool used_update = false;
26855 rtx restore_basereg = NULL_RTX;
26857 if (MEM_P (src) && INT_REGNO_P (reg))
26859 rtx breg;
26861 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26862 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26864 rtx delta_rtx;
26865 breg = XEXP (XEXP (src, 0), 0);
26866 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26867 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26868 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26869 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26870 src = replace_equiv_address (src, breg);
26872 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26874 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26876 rtx basereg = XEXP (XEXP (src, 0), 0);
26877 if (TARGET_UPDATE)
26879 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26880 emit_insn (gen_rtx_SET (ndst,
26881 gen_rtx_MEM (reg_mode,
26882 XEXP (src, 0))));
26883 used_update = true;
26885 else
26886 emit_insn (gen_rtx_SET (basereg,
26887 XEXP (XEXP (src, 0), 1)));
26888 src = replace_equiv_address (src, basereg);
26890 else
26892 rtx basereg = gen_rtx_REG (Pmode, reg);
26893 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26894 src = replace_equiv_address (src, basereg);
26898 breg = XEXP (src, 0);
26899 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26900 breg = XEXP (breg, 0);
26902 /* If the base register we are using to address memory is
26903 also a destination reg, then change that register last. */
26904 if (REG_P (breg)
26905 && REGNO (breg) >= REGNO (dst)
26906 && REGNO (breg) < REGNO (dst) + nregs)
26907 j = REGNO (breg) - REGNO (dst);
26909 else if (MEM_P (dst) && INT_REGNO_P (reg))
26911 rtx breg;
26913 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26914 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26916 rtx delta_rtx;
26917 breg = XEXP (XEXP (dst, 0), 0);
26918 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26919 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26920 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26922 /* We have to update the breg before doing the store.
26923 Use store with update, if available. */
26925 if (TARGET_UPDATE)
26927 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26928 emit_insn (TARGET_32BIT
26929 ? (TARGET_POWERPC64
26930 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26931 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26932 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26933 used_update = true;
26935 else
26936 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26937 dst = replace_equiv_address (dst, breg);
26939 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26940 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26942 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26944 rtx basereg = XEXP (XEXP (dst, 0), 0);
26945 if (TARGET_UPDATE)
26947 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26948 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26949 XEXP (dst, 0)),
26950 nsrc));
26951 used_update = true;
26953 else
26954 emit_insn (gen_rtx_SET (basereg,
26955 XEXP (XEXP (dst, 0), 1)));
26956 dst = replace_equiv_address (dst, basereg);
26958 else
26960 rtx basereg = XEXP (XEXP (dst, 0), 0);
26961 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26962 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26963 && REG_P (basereg)
26964 && REG_P (offsetreg)
26965 && REGNO (basereg) != REGNO (offsetreg));
26966 if (REGNO (basereg) == 0)
26968 rtx tmp = offsetreg;
26969 offsetreg = basereg;
26970 basereg = tmp;
26972 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26973 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26974 dst = replace_equiv_address (dst, basereg);
26977 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26978 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26981 for (i = 0; i < nregs; i++)
26983 /* Calculate index to next subword. */
26984 ++j;
26985 if (j == nregs)
26986 j = 0;
26988 /* If compiler already emitted move of first word by
26989 store with update, no need to do anything. */
26990 if (j == 0 && used_update)
26991 continue;
26993 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26994 j * reg_mode_size),
26995 simplify_gen_subreg (reg_mode, src, mode,
26996 j * reg_mode_size)));
26998 if (restore_basereg != NULL_RTX)
26999 emit_insn (restore_basereg);
27004 /* This page contains routines that are used to determine what the
27005 function prologue and epilogue code will do and write them out. */
27007 static inline bool
27008 save_reg_p (int r)
27010 return !call_used_regs[r] && df_regs_ever_live_p (r);
27013 /* Determine whether the gp REG is really used. */
27015 static bool
27016 rs6000_reg_live_or_pic_offset_p (int reg)
27018 /* We need to mark the PIC offset register live for the same conditions
27019 as it is set up, or otherwise it won't be saved before we clobber it. */
27021 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
27023 if (TARGET_TOC && TARGET_MINIMAL_TOC
27024 && (crtl->calls_eh_return
27025 || df_regs_ever_live_p (reg)
27026 || !constant_pool_empty_p ()))
27027 return true;
27029 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
27030 && flag_pic)
27031 return true;
27034 /* If the function calls eh_return, claim used all the registers that would
27035 be checked for liveness otherwise. */
27037 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
27038 && !call_used_regs[reg]);
27041 /* Return the first fixed-point register that is required to be
27042 saved. 32 if none. */
27045 first_reg_to_save (void)
27047 int first_reg;
27049 /* Find lowest numbered live register. */
27050 for (first_reg = 13; first_reg <= 31; first_reg++)
27051 if (save_reg_p (first_reg))
27052 break;
27054 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
27055 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
27056 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27057 || (TARGET_TOC && TARGET_MINIMAL_TOC))
27058 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
27059 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
27061 #if TARGET_MACHO
27062 if (flag_pic
27063 && crtl->uses_pic_offset_table
27064 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
27065 return RS6000_PIC_OFFSET_TABLE_REGNUM;
27066 #endif
27068 return first_reg;
27071 /* Similar, for FP regs. */
27074 first_fp_reg_to_save (void)
27076 int first_reg;
27078 /* Find lowest numbered live register. */
27079 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
27080 if (save_reg_p (first_reg))
27081 break;
27083 return first_reg;
27086 /* Similar, for AltiVec regs. */
27088 static int
27089 first_altivec_reg_to_save (void)
27091 int i;
27093 /* Stack frame remains as is unless we are in AltiVec ABI. */
27094 if (! TARGET_ALTIVEC_ABI)
27095 return LAST_ALTIVEC_REGNO + 1;
27097 /* On Darwin, the unwind routines are compiled without
27098 TARGET_ALTIVEC, and use save_world to save/restore the
27099 altivec registers when necessary. */
27100 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27101 && ! TARGET_ALTIVEC)
27102 return FIRST_ALTIVEC_REGNO + 20;
27104 /* Find lowest numbered live register. */
27105 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
27106 if (save_reg_p (i))
27107 break;
27109 return i;
27112 /* Return a 32-bit mask of the AltiVec registers we need to set in
27113 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
27114 the 32-bit word is 0. */
27116 static unsigned int
27117 compute_vrsave_mask (void)
27119 unsigned int i, mask = 0;
27121 /* On Darwin, the unwind routines are compiled without
27122 TARGET_ALTIVEC, and use save_world to save/restore the
27123 call-saved altivec registers when necessary. */
27124 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27125 && ! TARGET_ALTIVEC)
27126 mask |= 0xFFF;
27128 /* First, find out if we use _any_ altivec registers. */
27129 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27130 if (df_regs_ever_live_p (i))
27131 mask |= ALTIVEC_REG_BIT (i);
27133 if (mask == 0)
27134 return mask;
27136 /* Next, remove the argument registers from the set. These must
27137 be in the VRSAVE mask set by the caller, so we don't need to add
27138 them in again. More importantly, the mask we compute here is
27139 used to generate CLOBBERs in the set_vrsave insn, and we do not
27140 wish the argument registers to die. */
27141 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
27142 mask &= ~ALTIVEC_REG_BIT (i);
27144 /* Similarly, remove the return value from the set. */
27146 bool yes = false;
27147 diddle_return_value (is_altivec_return_reg, &yes);
27148 if (yes)
27149 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
27152 return mask;
27155 /* For a very restricted set of circumstances, we can cut down the
27156 size of prologues/epilogues by calling our own save/restore-the-world
27157 routines. */
27159 static void
27160 compute_save_world_info (rs6000_stack_t *info)
27162 info->world_save_p = 1;
27163 info->world_save_p
27164 = (WORLD_SAVE_P (info)
27165 && DEFAULT_ABI == ABI_DARWIN
27166 && !cfun->has_nonlocal_label
27167 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
27168 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
27169 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
27170 && info->cr_save_p);
27172 /* This will not work in conjunction with sibcalls. Make sure there
27173 are none. (This check is expensive, but seldom executed.) */
27174 if (WORLD_SAVE_P (info))
27176 rtx_insn *insn;
27177 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
27178 if (CALL_P (insn) && SIBLING_CALL_P (insn))
27180 info->world_save_p = 0;
27181 break;
27185 if (WORLD_SAVE_P (info))
27187 /* Even if we're not touching VRsave, make sure there's room on the
27188 stack for it, if it looks like we're calling SAVE_WORLD, which
27189 will attempt to save it. */
27190 info->vrsave_size = 4;
27192 /* If we are going to save the world, we need to save the link register too. */
27193 info->lr_save_p = 1;
27195 /* "Save" the VRsave register too if we're saving the world. */
27196 if (info->vrsave_mask == 0)
27197 info->vrsave_mask = compute_vrsave_mask ();
27199 /* Because the Darwin register save/restore routines only handle
27200 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27201 check. */
27202 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
27203 && (info->first_altivec_reg_save
27204 >= FIRST_SAVED_ALTIVEC_REGNO));
27207 return;
27211 static void
27212 is_altivec_return_reg (rtx reg, void *xyes)
27214 bool *yes = (bool *) xyes;
27215 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
27216 *yes = true;
27220 /* Return whether REG is a global user reg or has been specifed by
27221 -ffixed-REG. We should not restore these, and so cannot use
27222 lmw or out-of-line restore functions if there are any. We also
27223 can't save them (well, emit frame notes for them), because frame
27224 unwinding during exception handling will restore saved registers. */
27226 static bool
27227 fixed_reg_p (int reg)
27229 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27230 backend sets it, overriding anything the user might have given. */
27231 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
27232 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
27233 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27234 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
27235 return false;
27237 return fixed_regs[reg];
27240 /* Determine the strategy for savings/restoring registers. */
27242 enum {
27243 SAVE_MULTIPLE = 0x1,
27244 SAVE_INLINE_GPRS = 0x2,
27245 SAVE_INLINE_FPRS = 0x4,
27246 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
27247 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
27248 SAVE_INLINE_VRS = 0x20,
27249 REST_MULTIPLE = 0x100,
27250 REST_INLINE_GPRS = 0x200,
27251 REST_INLINE_FPRS = 0x400,
27252 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
27253 REST_INLINE_VRS = 0x1000
27256 static int
27257 rs6000_savres_strategy (rs6000_stack_t *info,
27258 bool using_static_chain_p)
27260 int strategy = 0;
27262 /* Select between in-line and out-of-line save and restore of regs.
27263 First, all the obvious cases where we don't use out-of-line. */
27264 if (crtl->calls_eh_return
27265 || cfun->machine->ra_need_lr)
27266 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
27267 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
27268 | SAVE_INLINE_VRS | REST_INLINE_VRS);
27270 if (info->first_gp_reg_save == 32)
27271 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27273 if (info->first_fp_reg_save == 64
27274 /* The out-of-line FP routines use double-precision stores;
27275 we can't use those routines if we don't have such stores. */
27276 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
27277 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27279 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
27280 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27282 /* Define cutoff for using out-of-line functions to save registers. */
27283 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
27285 if (!optimize_size)
27287 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27288 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27289 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27291 else
27293 /* Prefer out-of-line restore if it will exit. */
27294 if (info->first_fp_reg_save > 61)
27295 strategy |= SAVE_INLINE_FPRS;
27296 if (info->first_gp_reg_save > 29)
27298 if (info->first_fp_reg_save == 64)
27299 strategy |= SAVE_INLINE_GPRS;
27300 else
27301 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27303 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
27304 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27307 else if (DEFAULT_ABI == ABI_DARWIN)
27309 if (info->first_fp_reg_save > 60)
27310 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27311 if (info->first_gp_reg_save > 29)
27312 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27313 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27315 else
27317 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27318 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
27319 || info->first_fp_reg_save > 61)
27320 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27321 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27322 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27325 /* Don't bother to try to save things out-of-line if r11 is occupied
27326 by the static chain. It would require too much fiddling and the
27327 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27328 pointer on Darwin, and AIX uses r1 or r12. */
27329 if (using_static_chain_p
27330 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27331 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
27332 | SAVE_INLINE_GPRS
27333 | SAVE_INLINE_VRS);
27335 /* Saving CR interferes with the exit routines used on the SPE, so
27336 just punt here. */
27337 if (TARGET_SPE_ABI
27338 && info->spe_64bit_regs_used
27339 && info->cr_save_p)
27340 strategy |= REST_INLINE_GPRS;
27342 /* We can only use the out-of-line routines to restore fprs if we've
27343 saved all the registers from first_fp_reg_save in the prologue.
27344 Otherwise, we risk loading garbage. Of course, if we have saved
27345 out-of-line then we know we haven't skipped any fprs. */
27346 if ((strategy & SAVE_INLINE_FPRS)
27347 && !(strategy & REST_INLINE_FPRS))
27349 int i;
27351 for (i = info->first_fp_reg_save; i < 64; i++)
27352 if (fixed_regs[i] || !save_reg_p (i))
27354 strategy |= REST_INLINE_FPRS;
27355 break;
27359 /* Similarly, for altivec regs. */
27360 if ((strategy & SAVE_INLINE_VRS)
27361 && !(strategy & REST_INLINE_VRS))
27363 int i;
27365 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
27366 if (fixed_regs[i] || !save_reg_p (i))
27368 strategy |= REST_INLINE_VRS;
27369 break;
27373 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27374 saved is an out-of-line save or restore. Set up the value for
27375 the next test (excluding out-of-line gprs). */
27376 bool lr_save_p = (info->lr_save_p
27377 || !(strategy & SAVE_INLINE_FPRS)
27378 || !(strategy & SAVE_INLINE_VRS)
27379 || !(strategy & REST_INLINE_FPRS)
27380 || !(strategy & REST_INLINE_VRS));
27382 if (TARGET_MULTIPLE
27383 && !TARGET_POWERPC64
27384 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
27385 && info->first_gp_reg_save < 31
27386 && !(flag_shrink_wrap
27387 && flag_shrink_wrap_separate
27388 && optimize_function_for_speed_p (cfun)))
27390 /* Prefer store multiple for saves over out-of-line routines,
27391 since the store-multiple instruction will always be smaller. */
27392 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
27394 /* The situation is more complicated with load multiple. We'd
27395 prefer to use the out-of-line routines for restores, since the
27396 "exit" out-of-line routines can handle the restore of LR and the
27397 frame teardown. However if doesn't make sense to use the
27398 out-of-line routine if that is the only reason we'd need to save
27399 LR, and we can't use the "exit" out-of-line gpr restore if we
27400 have saved some fprs; In those cases it is advantageous to use
27401 load multiple when available. */
27402 if (info->first_fp_reg_save != 64 || !lr_save_p)
27403 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27406 /* Using the "exit" out-of-line routine does not improve code size
27407 if using it would require lr to be saved and if only saving one
27408 or two gprs. */
27409 else if (!lr_save_p && info->first_gp_reg_save > 29)
27410 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27412 /* We can only use load multiple or the out-of-line routines to
27413 restore gprs if we've saved all the registers from
27414 first_gp_reg_save. Otherwise, we risk loading garbage.
27415 Of course, if we have saved out-of-line or used stmw then we know
27416 we haven't skipped any gprs. */
27417 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27418 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27420 int i;
27422 for (i = info->first_gp_reg_save; i < 32; i++)
27423 if (fixed_reg_p (i) || !save_reg_p (i))
27425 strategy |= REST_INLINE_GPRS;
27426 strategy &= ~REST_MULTIPLE;
27427 break;
27431 if (TARGET_ELF && TARGET_64BIT)
27433 if (!(strategy & SAVE_INLINE_FPRS))
27434 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27435 else if (!(strategy & SAVE_INLINE_GPRS)
27436 && info->first_fp_reg_save == 64)
27437 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27439 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27440 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27442 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27443 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27445 return strategy;
27448 /* Calculate the stack information for the current function. This is
27449 complicated by having two separate calling sequences, the AIX calling
27450 sequence and the V.4 calling sequence.
27452 AIX (and Darwin/Mac OS X) stack frames look like:
27453 32-bit 64-bit
27454 SP----> +---------------------------------------+
27455 | back chain to caller | 0 0
27456 +---------------------------------------+
27457 | saved CR | 4 8 (8-11)
27458 +---------------------------------------+
27459 | saved LR | 8 16
27460 +---------------------------------------+
27461 | reserved for compilers | 12 24
27462 +---------------------------------------+
27463 | reserved for binders | 16 32
27464 +---------------------------------------+
27465 | saved TOC pointer | 20 40
27466 +---------------------------------------+
27467 | Parameter save area (+padding*) (P) | 24 48
27468 +---------------------------------------+
27469 | Alloca space (A) | 24+P etc.
27470 +---------------------------------------+
27471 | Local variable space (L) | 24+P+A
27472 +---------------------------------------+
27473 | Float/int conversion temporary (X) | 24+P+A+L
27474 +---------------------------------------+
27475 | Save area for AltiVec registers (W) | 24+P+A+L+X
27476 +---------------------------------------+
27477 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27478 +---------------------------------------+
27479 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27480 +---------------------------------------+
27481 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27482 +---------------------------------------+
27483 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27484 +---------------------------------------+
27485 old SP->| back chain to caller's caller |
27486 +---------------------------------------+
27488 * If the alloca area is present, the parameter save area is
27489 padded so that the former starts 16-byte aligned.
27491 The required alignment for AIX configurations is two words (i.e., 8
27492 or 16 bytes).
27494 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27496 SP----> +---------------------------------------+
27497 | Back chain to caller | 0
27498 +---------------------------------------+
27499 | Save area for CR | 8
27500 +---------------------------------------+
27501 | Saved LR | 16
27502 +---------------------------------------+
27503 | Saved TOC pointer | 24
27504 +---------------------------------------+
27505 | Parameter save area (+padding*) (P) | 32
27506 +---------------------------------------+
27507 | Alloca space (A) | 32+P
27508 +---------------------------------------+
27509 | Local variable space (L) | 32+P+A
27510 +---------------------------------------+
27511 | Save area for AltiVec registers (W) | 32+P+A+L
27512 +---------------------------------------+
27513 | AltiVec alignment padding (Y) | 32+P+A+L+W
27514 +---------------------------------------+
27515 | Save area for GP registers (G) | 32+P+A+L+W+Y
27516 +---------------------------------------+
27517 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27518 +---------------------------------------+
27519 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27520 +---------------------------------------+
27522 * If the alloca area is present, the parameter save area is
27523 padded so that the former starts 16-byte aligned.
27525 V.4 stack frames look like:
27527 SP----> +---------------------------------------+
27528 | back chain to caller | 0
27529 +---------------------------------------+
27530 | caller's saved LR | 4
27531 +---------------------------------------+
27532 | Parameter save area (+padding*) (P) | 8
27533 +---------------------------------------+
27534 | Alloca space (A) | 8+P
27535 +---------------------------------------+
27536 | Varargs save area (V) | 8+P+A
27537 +---------------------------------------+
27538 | Local variable space (L) | 8+P+A+V
27539 +---------------------------------------+
27540 | Float/int conversion temporary (X) | 8+P+A+V+L
27541 +---------------------------------------+
27542 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27543 +---------------------------------------+
27544 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27545 +---------------------------------------+
27546 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27547 +---------------------------------------+
27548 | SPE: area for 64-bit GP registers |
27549 +---------------------------------------+
27550 | SPE alignment padding |
27551 +---------------------------------------+
27552 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27553 +---------------------------------------+
27554 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27555 +---------------------------------------+
27556 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27557 +---------------------------------------+
27558 old SP->| back chain to caller's caller |
27559 +---------------------------------------+
27561 * If the alloca area is present and the required alignment is
27562 16 bytes, the parameter save area is padded so that the
27563 alloca area starts 16-byte aligned.
27565 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27566 given. (But note below and in sysv4.h that we require only 8 and
27567 may round up the size of our stack frame anyways. The historical
27568 reason is early versions of powerpc-linux which didn't properly
27569 align the stack at program startup. A happy side-effect is that
27570 -mno-eabi libraries can be used with -meabi programs.)
27572 The EABI configuration defaults to the V.4 layout. However,
27573 the stack alignment requirements may differ. If -mno-eabi is not
27574 given, the required stack alignment is 8 bytes; if -mno-eabi is
27575 given, the required alignment is 16 bytes. (But see V.4 comment
27576 above.) */
27578 #ifndef ABI_STACK_BOUNDARY
27579 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27580 #endif
27582 static rs6000_stack_t *
27583 rs6000_stack_info (void)
27585 /* We should never be called for thunks, we are not set up for that. */
27586 gcc_assert (!cfun->is_thunk);
27588 rs6000_stack_t *info = &stack_info;
27589 int reg_size = TARGET_32BIT ? 4 : 8;
27590 int ehrd_size;
27591 int ehcr_size;
27592 int save_align;
27593 int first_gp;
27594 HOST_WIDE_INT non_fixed_size;
27595 bool using_static_chain_p;
27597 if (reload_completed && info->reload_completed)
27598 return info;
27600 memset (info, 0, sizeof (*info));
27601 info->reload_completed = reload_completed;
27603 if (TARGET_SPE)
27605 /* Cache value so we don't rescan instruction chain over and over. */
27606 if (cfun->machine->spe_insn_chain_scanned_p == 0)
27607 cfun->machine->spe_insn_chain_scanned_p
27608 = spe_func_has_64bit_regs_p () + 1;
27609 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27612 /* Select which calling sequence. */
27613 info->abi = DEFAULT_ABI;
27615 /* Calculate which registers need to be saved & save area size. */
27616 info->first_gp_reg_save = first_reg_to_save ();
27617 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27618 even if it currently looks like we won't. Reload may need it to
27619 get at a constant; if so, it will have already created a constant
27620 pool entry for it. */
27621 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27622 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27623 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27624 && crtl->uses_const_pool
27625 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27626 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27627 else
27628 first_gp = info->first_gp_reg_save;
27630 info->gp_size = reg_size * (32 - first_gp);
27632 /* For the SPE, we have an additional upper 32-bits on each GPR.
27633 Ideally we should save the entire 64-bits only when the upper
27634 half is used in SIMD instructions. Since we only record
27635 registers live (not the size they are used in), this proves
27636 difficult because we'd have to traverse the instruction chain at
27637 the right time, taking reload into account. This is a real pain,
27638 so we opt to save the GPRs in 64-bits always if but one register
27639 gets used in 64-bits. Otherwise, all the registers in the frame
27640 get saved in 32-bits.
27642 So... since when we save all GPRs (except the SP) in 64-bits, the
27643 traditional GP save area will be empty. */
27644 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27645 info->gp_size = 0;
27647 info->first_fp_reg_save = first_fp_reg_to_save ();
27648 info->fp_size = 8 * (64 - info->first_fp_reg_save);
27650 info->first_altivec_reg_save = first_altivec_reg_to_save ();
27651 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27652 - info->first_altivec_reg_save);
27654 /* Does this function call anything? */
27655 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27657 /* Determine if we need to save the condition code registers. */
27658 if (save_reg_p (CR2_REGNO)
27659 || save_reg_p (CR3_REGNO)
27660 || save_reg_p (CR4_REGNO))
27662 info->cr_save_p = 1;
27663 if (DEFAULT_ABI == ABI_V4)
27664 info->cr_size = reg_size;
27667 /* If the current function calls __builtin_eh_return, then we need
27668 to allocate stack space for registers that will hold data for
27669 the exception handler. */
27670 if (crtl->calls_eh_return)
27672 unsigned int i;
27673 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27674 continue;
27676 /* SPE saves EH registers in 64-bits. */
27677 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27678 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27680 else
27681 ehrd_size = 0;
27683 /* In the ELFv2 ABI, we also need to allocate space for separate
27684 CR field save areas if the function calls __builtin_eh_return. */
27685 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27687 /* This hard-codes that we have three call-saved CR fields. */
27688 ehcr_size = 3 * reg_size;
27689 /* We do *not* use the regular CR save mechanism. */
27690 info->cr_save_p = 0;
27692 else
27693 ehcr_size = 0;
27695 /* Determine various sizes. */
27696 info->reg_size = reg_size;
27697 info->fixed_size = RS6000_SAVE_AREA;
27698 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
27699 if (cfun->calls_alloca)
27700 info->parm_size =
27701 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27702 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27703 else
27704 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
27705 TARGET_ALTIVEC ? 16 : 8);
27706 if (FRAME_GROWS_DOWNWARD)
27707 info->vars_size
27708 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27709 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27710 - (info->fixed_size + info->vars_size + info->parm_size);
27712 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27713 info->spe_gp_size = 8 * (32 - first_gp);
27715 if (TARGET_ALTIVEC_ABI)
27716 info->vrsave_mask = compute_vrsave_mask ();
27718 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27719 info->vrsave_size = 4;
27721 compute_save_world_info (info);
27723 /* Calculate the offsets. */
27724 switch (DEFAULT_ABI)
27726 case ABI_NONE:
27727 default:
27728 gcc_unreachable ();
27730 case ABI_AIX:
27731 case ABI_ELFv2:
27732 case ABI_DARWIN:
27733 info->fp_save_offset = -info->fp_size;
27734 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27736 if (TARGET_ALTIVEC_ABI)
27738 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27740 /* Align stack so vector save area is on a quadword boundary.
27741 The padding goes above the vectors. */
27742 if (info->altivec_size != 0)
27743 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27745 info->altivec_save_offset = info->vrsave_save_offset
27746 - info->altivec_padding_size
27747 - info->altivec_size;
27748 gcc_assert (info->altivec_size == 0
27749 || info->altivec_save_offset % 16 == 0);
27751 /* Adjust for AltiVec case. */
27752 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27754 else
27755 info->ehrd_offset = info->gp_save_offset - ehrd_size;
27757 info->ehcr_offset = info->ehrd_offset - ehcr_size;
27758 info->cr_save_offset = reg_size; /* first word when 64-bit. */
27759 info->lr_save_offset = 2*reg_size;
27760 break;
27762 case ABI_V4:
27763 info->fp_save_offset = -info->fp_size;
27764 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27765 info->cr_save_offset = info->gp_save_offset - info->cr_size;
27767 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27769 /* Align stack so SPE GPR save area is aligned on a
27770 double-word boundary. */
27771 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27772 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27773 else
27774 info->spe_padding_size = 0;
27776 info->spe_gp_save_offset = info->cr_save_offset
27777 - info->spe_padding_size
27778 - info->spe_gp_size;
27780 /* Adjust for SPE case. */
27781 info->ehrd_offset = info->spe_gp_save_offset;
27783 else if (TARGET_ALTIVEC_ABI)
27785 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27787 /* Align stack so vector save area is on a quadword boundary. */
27788 if (info->altivec_size != 0)
27789 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27791 info->altivec_save_offset = info->vrsave_save_offset
27792 - info->altivec_padding_size
27793 - info->altivec_size;
27795 /* Adjust for AltiVec case. */
27796 info->ehrd_offset = info->altivec_save_offset;
27798 else
27799 info->ehrd_offset = info->cr_save_offset;
27801 info->ehrd_offset -= ehrd_size;
27802 info->lr_save_offset = reg_size;
27805 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27806 info->save_size = RS6000_ALIGN (info->fp_size
27807 + info->gp_size
27808 + info->altivec_size
27809 + info->altivec_padding_size
27810 + info->spe_gp_size
27811 + info->spe_padding_size
27812 + ehrd_size
27813 + ehcr_size
27814 + info->cr_size
27815 + info->vrsave_size,
27816 save_align);
27818 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27820 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27821 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27823 /* Determine if we need to save the link register. */
27824 if (info->calls_p
27825 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27826 && crtl->profile
27827 && !TARGET_PROFILE_KERNEL)
27828 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27829 #ifdef TARGET_RELOCATABLE
27830 || (DEFAULT_ABI == ABI_V4
27831 && (TARGET_RELOCATABLE || flag_pic > 1)
27832 && !constant_pool_empty_p ())
27833 #endif
27834 || rs6000_ra_ever_killed ())
27835 info->lr_save_p = 1;
27837 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27838 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27839 && call_used_regs[STATIC_CHAIN_REGNUM]);
27840 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27842 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27843 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27844 || !(info->savres_strategy & SAVE_INLINE_VRS)
27845 || !(info->savres_strategy & REST_INLINE_GPRS)
27846 || !(info->savres_strategy & REST_INLINE_FPRS)
27847 || !(info->savres_strategy & REST_INLINE_VRS))
27848 info->lr_save_p = 1;
27850 if (info->lr_save_p)
27851 df_set_regs_ever_live (LR_REGNO, true);
27853 /* Determine if we need to allocate any stack frame:
27855 For AIX we need to push the stack if a frame pointer is needed
27856 (because the stack might be dynamically adjusted), if we are
27857 debugging, if we make calls, or if the sum of fp_save, gp_save,
27858 and local variables are more than the space needed to save all
27859 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27860 + 18*8 = 288 (GPR13 reserved).
27862 For V.4 we don't have the stack cushion that AIX uses, but assume
27863 that the debugger can handle stackless frames. */
27865 if (info->calls_p)
27866 info->push_p = 1;
27868 else if (DEFAULT_ABI == ABI_V4)
27869 info->push_p = non_fixed_size != 0;
27871 else if (frame_pointer_needed)
27872 info->push_p = 1;
27874 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27875 info->push_p = 1;
27877 else
27878 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27880 return info;
27883 /* Return true if the current function uses any GPRs in 64-bit SIMD
27884 mode. */
27886 static bool
27887 spe_func_has_64bit_regs_p (void)
27889 rtx_insn *insns, *insn;
27891 /* Functions that save and restore all the call-saved registers will
27892 need to save/restore the registers in 64-bits. */
27893 if (crtl->calls_eh_return
27894 || cfun->calls_setjmp
27895 || crtl->has_nonlocal_goto)
27896 return true;
27898 insns = get_insns ();
27900 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27902 if (INSN_P (insn))
27904 rtx i;
27906 /* FIXME: This should be implemented with attributes...
27908 (set_attr "spe64" "true")....then,
27909 if (get_spe64(insn)) return true;
27911 It's the only reliable way to do the stuff below. */
27913 i = PATTERN (insn);
27914 if (GET_CODE (i) == SET)
27916 machine_mode mode = GET_MODE (SET_SRC (i));
27918 if (SPE_VECTOR_MODE (mode))
27919 return true;
27920 if (TARGET_E500_DOUBLE
27921 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27922 return true;
27927 return false;
27930 static void
27931 debug_stack_info (rs6000_stack_t *info)
27933 const char *abi_string;
27935 if (! info)
27936 info = rs6000_stack_info ();
27938 fprintf (stderr, "\nStack information for function %s:\n",
27939 ((current_function_decl && DECL_NAME (current_function_decl))
27940 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27941 : "<unknown>"));
27943 switch (info->abi)
27945 default: abi_string = "Unknown"; break;
27946 case ABI_NONE: abi_string = "NONE"; break;
27947 case ABI_AIX: abi_string = "AIX"; break;
27948 case ABI_ELFv2: abi_string = "ELFv2"; break;
27949 case ABI_DARWIN: abi_string = "Darwin"; break;
27950 case ABI_V4: abi_string = "V.4"; break;
27953 fprintf (stderr, "\tABI = %5s\n", abi_string);
27955 if (TARGET_ALTIVEC_ABI)
27956 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27958 if (TARGET_SPE_ABI)
27959 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27961 if (info->first_gp_reg_save != 32)
27962 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27964 if (info->first_fp_reg_save != 64)
27965 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27967 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27968 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27969 info->first_altivec_reg_save);
27971 if (info->lr_save_p)
27972 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27974 if (info->cr_save_p)
27975 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27977 if (info->vrsave_mask)
27978 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27980 if (info->push_p)
27981 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27983 if (info->calls_p)
27984 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27986 if (info->gp_size)
27987 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27989 if (info->fp_size)
27990 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27992 if (info->altivec_size)
27993 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27994 info->altivec_save_offset);
27996 if (info->spe_gp_size)
27997 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27998 info->spe_gp_save_offset);
28000 if (info->vrsave_size)
28001 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
28002 info->vrsave_save_offset);
28004 if (info->lr_save_p)
28005 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
28007 if (info->cr_save_p)
28008 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
28010 if (info->varargs_save_offset)
28011 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
28013 if (info->total_size)
28014 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28015 info->total_size);
28017 if (info->vars_size)
28018 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
28019 info->vars_size);
28021 if (info->parm_size)
28022 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
28024 if (info->fixed_size)
28025 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
28027 if (info->gp_size)
28028 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
28030 if (info->spe_gp_size)
28031 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
28033 if (info->fp_size)
28034 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
28036 if (info->altivec_size)
28037 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
28039 if (info->vrsave_size)
28040 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
28042 if (info->altivec_padding_size)
28043 fprintf (stderr, "\taltivec_padding_size= %5d\n",
28044 info->altivec_padding_size);
28046 if (info->spe_padding_size)
28047 fprintf (stderr, "\tspe_padding_size = %5d\n",
28048 info->spe_padding_size);
28050 if (info->cr_size)
28051 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
28053 if (info->save_size)
28054 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
28056 if (info->reg_size != 4)
28057 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
28059 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
28061 fprintf (stderr, "\n");
28065 rs6000_return_addr (int count, rtx frame)
28067 /* Currently we don't optimize very well between prolog and body
28068 code and for PIC code the code can be actually quite bad, so
28069 don't try to be too clever here. */
28070 if (count != 0
28071 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
28073 cfun->machine->ra_needs_full_frame = 1;
28075 return
28076 gen_rtx_MEM
28077 (Pmode,
28078 memory_address
28079 (Pmode,
28080 plus_constant (Pmode,
28081 copy_to_reg
28082 (gen_rtx_MEM (Pmode,
28083 memory_address (Pmode, frame))),
28084 RETURN_ADDRESS_OFFSET)));
28087 cfun->machine->ra_need_lr = 1;
28088 return get_hard_reg_initial_val (Pmode, LR_REGNO);
28091 /* Say whether a function is a candidate for sibcall handling or not. */
28093 static bool
28094 rs6000_function_ok_for_sibcall (tree decl, tree exp)
28096 tree fntype;
28098 if (decl)
28099 fntype = TREE_TYPE (decl);
28100 else
28101 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
28103 /* We can't do it if the called function has more vector parameters
28104 than the current function; there's nowhere to put the VRsave code. */
28105 if (TARGET_ALTIVEC_ABI
28106 && TARGET_ALTIVEC_VRSAVE
28107 && !(decl && decl == current_function_decl))
28109 function_args_iterator args_iter;
28110 tree type;
28111 int nvreg = 0;
28113 /* Functions with vector parameters are required to have a
28114 prototype, so the argument type info must be available
28115 here. */
28116 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
28117 if (TREE_CODE (type) == VECTOR_TYPE
28118 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28119 nvreg++;
28121 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
28122 if (TREE_CODE (type) == VECTOR_TYPE
28123 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28124 nvreg--;
28126 if (nvreg > 0)
28127 return false;
28130 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28131 functions, because the callee may have a different TOC pointer to
28132 the caller and there's no way to ensure we restore the TOC when
28133 we return. With the secure-plt SYSV ABI we can't make non-local
28134 calls when -fpic/PIC because the plt call stubs use r30. */
28135 if (DEFAULT_ABI == ABI_DARWIN
28136 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28137 && decl
28138 && !DECL_EXTERNAL (decl)
28139 && !DECL_WEAK (decl)
28140 && (*targetm.binds_local_p) (decl))
28141 || (DEFAULT_ABI == ABI_V4
28142 && (!TARGET_SECURE_PLT
28143 || !flag_pic
28144 || (decl
28145 && (*targetm.binds_local_p) (decl)))))
28147 tree attr_list = TYPE_ATTRIBUTES (fntype);
28149 if (!lookup_attribute ("longcall", attr_list)
28150 || lookup_attribute ("shortcall", attr_list))
28151 return true;
28154 return false;
28157 static int
28158 rs6000_ra_ever_killed (void)
28160 rtx_insn *top;
28161 rtx reg;
28162 rtx_insn *insn;
28164 if (cfun->is_thunk)
28165 return 0;
28167 if (cfun->machine->lr_save_state)
28168 return cfun->machine->lr_save_state - 1;
28170 /* regs_ever_live has LR marked as used if any sibcalls are present,
28171 but this should not force saving and restoring in the
28172 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28173 clobbers LR, so that is inappropriate. */
28175 /* Also, the prologue can generate a store into LR that
28176 doesn't really count, like this:
28178 move LR->R0
28179 bcl to set PIC register
28180 move LR->R31
28181 move R0->LR
28183 When we're called from the epilogue, we need to avoid counting
28184 this as a store. */
28186 push_topmost_sequence ();
28187 top = get_insns ();
28188 pop_topmost_sequence ();
28189 reg = gen_rtx_REG (Pmode, LR_REGNO);
28191 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
28193 if (INSN_P (insn))
28195 if (CALL_P (insn))
28197 if (!SIBLING_CALL_P (insn))
28198 return 1;
28200 else if (find_regno_note (insn, REG_INC, LR_REGNO))
28201 return 1;
28202 else if (set_of (reg, insn) != NULL_RTX
28203 && !prologue_epilogue_contains (insn))
28204 return 1;
28207 return 0;
28210 /* Emit instructions needed to load the TOC register.
28211 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28212 a constant pool; or for SVR4 -fpic. */
28214 void
28215 rs6000_emit_load_toc_table (int fromprolog)
28217 rtx dest;
28218 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
28220 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
28222 char buf[30];
28223 rtx lab, tmp1, tmp2, got;
28225 lab = gen_label_rtx ();
28226 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
28227 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28228 if (flag_pic == 2)
28230 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28231 need_toc_init = 1;
28233 else
28234 got = rs6000_got_sym ();
28235 tmp1 = tmp2 = dest;
28236 if (!fromprolog)
28238 tmp1 = gen_reg_rtx (Pmode);
28239 tmp2 = gen_reg_rtx (Pmode);
28241 emit_insn (gen_load_toc_v4_PIC_1 (lab));
28242 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
28243 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
28244 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
28246 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
28248 emit_insn (gen_load_toc_v4_pic_si ());
28249 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28251 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
28253 char buf[30];
28254 rtx temp0 = (fromprolog
28255 ? gen_rtx_REG (Pmode, 0)
28256 : gen_reg_rtx (Pmode));
28258 if (fromprolog)
28260 rtx symF, symL;
28262 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28263 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28265 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28266 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28268 emit_insn (gen_load_toc_v4_PIC_1 (symF));
28269 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28270 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
28272 else
28274 rtx tocsym, lab;
28276 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28277 need_toc_init = 1;
28278 lab = gen_label_rtx ();
28279 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
28280 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28281 if (TARGET_LINK_STACK)
28282 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
28283 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
28285 emit_insn (gen_addsi3 (dest, temp0, dest));
28287 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
28289 /* This is for AIX code running in non-PIC ELF32. */
28290 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28292 need_toc_init = 1;
28293 emit_insn (gen_elf_high (dest, realsym));
28294 emit_insn (gen_elf_low (dest, dest, realsym));
28296 else
28298 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28300 if (TARGET_32BIT)
28301 emit_insn (gen_load_toc_aix_si (dest));
28302 else
28303 emit_insn (gen_load_toc_aix_di (dest));
28307 /* Emit instructions to restore the link register after determining where
28308 its value has been stored. */
28310 void
28311 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
28313 rs6000_stack_t *info = rs6000_stack_info ();
28314 rtx operands[2];
28316 operands[0] = source;
28317 operands[1] = scratch;
28319 if (info->lr_save_p)
28321 rtx frame_rtx = stack_pointer_rtx;
28322 HOST_WIDE_INT sp_offset = 0;
28323 rtx tmp;
28325 if (frame_pointer_needed
28326 || cfun->calls_alloca
28327 || info->total_size > 32767)
28329 tmp = gen_frame_mem (Pmode, frame_rtx);
28330 emit_move_insn (operands[1], tmp);
28331 frame_rtx = operands[1];
28333 else if (info->push_p)
28334 sp_offset = info->total_size;
28336 tmp = plus_constant (Pmode, frame_rtx,
28337 info->lr_save_offset + sp_offset);
28338 tmp = gen_frame_mem (Pmode, tmp);
28339 emit_move_insn (tmp, operands[0]);
28341 else
28342 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
28344 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28345 state of lr_save_p so any change from here on would be a bug. In
28346 particular, stop rs6000_ra_ever_killed from considering the SET
28347 of lr we may have added just above. */
28348 cfun->machine->lr_save_state = info->lr_save_p + 1;
28351 static GTY(()) alias_set_type set = -1;
28353 alias_set_type
28354 get_TOC_alias_set (void)
28356 if (set == -1)
28357 set = new_alias_set ();
28358 return set;
28361 /* This returns nonzero if the current function uses the TOC. This is
28362 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28363 is generated by the ABI_V4 load_toc_* patterns. */
28364 #if TARGET_ELF
28365 static int
28366 uses_TOC (void)
28368 rtx_insn *insn;
28370 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
28371 if (INSN_P (insn))
28373 rtx pat = PATTERN (insn);
28374 int i;
28376 if (GET_CODE (pat) == PARALLEL)
28377 for (i = 0; i < XVECLEN (pat, 0); i++)
28379 rtx sub = XVECEXP (pat, 0, i);
28380 if (GET_CODE (sub) == USE)
28382 sub = XEXP (sub, 0);
28383 if (GET_CODE (sub) == UNSPEC
28384 && XINT (sub, 1) == UNSPEC_TOC)
28385 return 1;
28389 return 0;
28391 #endif
28394 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28396 rtx tocrel, tocreg, hi;
28398 if (TARGET_DEBUG_ADDR)
28400 if (GET_CODE (symbol) == SYMBOL_REF)
28401 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28402 XSTR (symbol, 0));
28403 else
28405 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28406 GET_RTX_NAME (GET_CODE (symbol)));
28407 debug_rtx (symbol);
28411 if (!can_create_pseudo_p ())
28412 df_set_regs_ever_live (TOC_REGISTER, true);
28414 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28415 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28416 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28417 return tocrel;
28419 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28420 if (largetoc_reg != NULL)
28422 emit_move_insn (largetoc_reg, hi);
28423 hi = largetoc_reg;
28425 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28428 /* Issue assembly directives that create a reference to the given DWARF
28429 FRAME_TABLE_LABEL from the current function section. */
28430 void
28431 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28433 fprintf (asm_out_file, "\t.ref %s\n",
28434 (* targetm.strip_name_encoding) (frame_table_label));
28437 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28438 and the change to the stack pointer. */
28440 static void
28441 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28443 rtvec p;
28444 int i;
28445 rtx regs[3];
28447 i = 0;
28448 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28449 if (hard_frame_needed)
28450 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28451 if (!(REGNO (fp) == STACK_POINTER_REGNUM
28452 || (hard_frame_needed
28453 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28454 regs[i++] = fp;
28456 p = rtvec_alloc (i);
28457 while (--i >= 0)
28459 rtx mem = gen_frame_mem (BLKmode, regs[i]);
28460 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28463 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28466 /* Emit the correct code for allocating stack space, as insns.
28467 If COPY_REG, make sure a copy of the old frame is left there.
28468 The generated code may use hard register 0 as a temporary. */
28470 static rtx_insn *
28471 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28473 rtx_insn *insn;
28474 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28475 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28476 rtx todec = gen_int_mode (-size, Pmode);
28477 rtx par, set, mem;
28479 if (INTVAL (todec) != -size)
28481 warning (0, "stack frame too large");
28482 emit_insn (gen_trap ());
28483 return 0;
28486 if (crtl->limit_stack)
28488 if (REG_P (stack_limit_rtx)
28489 && REGNO (stack_limit_rtx) > 1
28490 && REGNO (stack_limit_rtx) <= 31)
28492 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28493 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28494 const0_rtx));
28496 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28497 && TARGET_32BIT
28498 && DEFAULT_ABI == ABI_V4
28499 && !flag_pic)
28501 rtx toload = gen_rtx_CONST (VOIDmode,
28502 gen_rtx_PLUS (Pmode,
28503 stack_limit_rtx,
28504 GEN_INT (size)));
28506 emit_insn (gen_elf_high (tmp_reg, toload));
28507 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28508 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28509 const0_rtx));
28511 else
28512 warning (0, "stack limit expression is not supported");
28515 if (copy_reg)
28517 if (copy_off != 0)
28518 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28519 else
28520 emit_move_insn (copy_reg, stack_reg);
28523 if (size > 32767)
28525 /* Need a note here so that try_split doesn't get confused. */
28526 if (get_last_insn () == NULL_RTX)
28527 emit_note (NOTE_INSN_DELETED);
28528 insn = emit_move_insn (tmp_reg, todec);
28529 try_split (PATTERN (insn), insn, 0);
28530 todec = tmp_reg;
28533 insn = emit_insn (TARGET_32BIT
28534 ? gen_movsi_update_stack (stack_reg, stack_reg,
28535 todec, stack_reg)
28536 : gen_movdi_di_update_stack (stack_reg, stack_reg,
28537 todec, stack_reg));
28538 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28539 it now and set the alias set/attributes. The above gen_*_update
28540 calls will generate a PARALLEL with the MEM set being the first
28541 operation. */
28542 par = PATTERN (insn);
28543 gcc_assert (GET_CODE (par) == PARALLEL);
28544 set = XVECEXP (par, 0, 0);
28545 gcc_assert (GET_CODE (set) == SET);
28546 mem = SET_DEST (set);
28547 gcc_assert (MEM_P (mem));
28548 MEM_NOTRAP_P (mem) = 1;
28549 set_mem_alias_set (mem, get_frame_alias_set ());
28551 RTX_FRAME_RELATED_P (insn) = 1;
28552 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28553 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28554 GEN_INT (-size))));
28555 return insn;
28558 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28560 #if PROBE_INTERVAL > 32768
28561 #error Cannot use indexed addressing mode for stack probing
28562 #endif
28564 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28565 inclusive. These are offsets from the current stack pointer. */
28567 static void
28568 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28570 /* See if we have a constant small number of probes to generate. If so,
28571 that's the easy case. */
28572 if (first + size <= 32768)
28574 HOST_WIDE_INT i;
28576 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28577 it exceeds SIZE. If only one probe is needed, this will not
28578 generate any code. Then probe at FIRST + SIZE. */
28579 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28580 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28581 -(first + i)));
28583 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28584 -(first + size)));
28587 /* Otherwise, do the same as above, but in a loop. Note that we must be
28588 extra careful with variables wrapping around because we might be at
28589 the very top (or the very bottom) of the address space and we have
28590 to be able to handle this case properly; in particular, we use an
28591 equality test for the loop condition. */
28592 else
28594 HOST_WIDE_INT rounded_size;
28595 rtx r12 = gen_rtx_REG (Pmode, 12);
28596 rtx r0 = gen_rtx_REG (Pmode, 0);
28598 /* Sanity check for the addressing mode we're going to use. */
28599 gcc_assert (first <= 32768);
28601 /* Step 1: round SIZE to the previous multiple of the interval. */
28603 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28606 /* Step 2: compute initial and final value of the loop counter. */
28608 /* TEST_ADDR = SP + FIRST. */
28609 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28610 -first)));
28612 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28613 if (rounded_size > 32768)
28615 emit_move_insn (r0, GEN_INT (-rounded_size));
28616 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28618 else
28619 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28620 -rounded_size)));
28623 /* Step 3: the loop
28627 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28628 probe at TEST_ADDR
28630 while (TEST_ADDR != LAST_ADDR)
28632 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28633 until it is equal to ROUNDED_SIZE. */
28635 if (TARGET_64BIT)
28636 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28637 else
28638 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28641 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28642 that SIZE is equal to ROUNDED_SIZE. */
28644 if (size != rounded_size)
28645 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28649 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28650 absolute addresses. */
28652 const char *
28653 output_probe_stack_range (rtx reg1, rtx reg2)
28655 static int labelno = 0;
28656 char loop_lab[32];
28657 rtx xops[2];
28659 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28661 /* Loop. */
28662 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28664 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28665 xops[0] = reg1;
28666 xops[1] = GEN_INT (-PROBE_INTERVAL);
28667 output_asm_insn ("addi %0,%0,%1", xops);
28669 /* Probe at TEST_ADDR. */
28670 xops[1] = gen_rtx_REG (Pmode, 0);
28671 output_asm_insn ("stw %1,0(%0)", xops);
28673 /* Test if TEST_ADDR == LAST_ADDR. */
28674 xops[1] = reg2;
28675 if (TARGET_64BIT)
28676 output_asm_insn ("cmpd 0,%0,%1", xops);
28677 else
28678 output_asm_insn ("cmpw 0,%0,%1", xops);
28680 /* Branch. */
28681 fputs ("\tbne 0,", asm_out_file);
28682 assemble_name_raw (asm_out_file, loop_lab);
28683 fputc ('\n', asm_out_file);
28685 return "";
28688 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28689 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28690 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28691 deduce these equivalences by itself so it wasn't necessary to hold
28692 its hand so much. Don't be tempted to always supply d2_f_d_e with
28693 the actual cfa register, ie. r31 when we are using a hard frame
28694 pointer. That fails when saving regs off r1, and sched moves the
28695 r31 setup past the reg saves. */
28697 static rtx_insn *
28698 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28699 rtx reg2, rtx repl2)
28701 rtx repl;
28703 if (REGNO (reg) == STACK_POINTER_REGNUM)
28705 gcc_checking_assert (val == 0);
28706 repl = NULL_RTX;
28708 else
28709 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28710 GEN_INT (val));
28712 rtx pat = PATTERN (insn);
28713 if (!repl && !reg2)
28715 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28716 if (GET_CODE (pat) == PARALLEL)
28717 for (int i = 0; i < XVECLEN (pat, 0); i++)
28718 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28720 rtx set = XVECEXP (pat, 0, i);
28722 /* If this PARALLEL has been emitted for out-of-line
28723 register save functions, or store multiple, then omit
28724 eh_frame info for any user-defined global regs. If
28725 eh_frame info is supplied, frame unwinding will
28726 restore a user reg. */
28727 if (!REG_P (SET_SRC (set))
28728 || !fixed_reg_p (REGNO (SET_SRC (set))))
28729 RTX_FRAME_RELATED_P (set) = 1;
28731 RTX_FRAME_RELATED_P (insn) = 1;
28732 return insn;
28735 /* We expect that 'pat' is either a SET or a PARALLEL containing
28736 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28737 are important so they all have to be marked RTX_FRAME_RELATED_P.
28738 Call simplify_replace_rtx on the SETs rather than the whole insn
28739 so as to leave the other stuff alone (for example USE of r12). */
28741 set_used_flags (pat);
28742 if (GET_CODE (pat) == SET)
28744 if (repl)
28745 pat = simplify_replace_rtx (pat, reg, repl);
28746 if (reg2)
28747 pat = simplify_replace_rtx (pat, reg2, repl2);
28749 else if (GET_CODE (pat) == PARALLEL)
28751 pat = shallow_copy_rtx (pat);
28752 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28754 for (int i = 0; i < XVECLEN (pat, 0); i++)
28755 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28757 rtx set = XVECEXP (pat, 0, i);
28759 if (repl)
28760 set = simplify_replace_rtx (set, reg, repl);
28761 if (reg2)
28762 set = simplify_replace_rtx (set, reg2, repl2);
28763 XVECEXP (pat, 0, i) = set;
28765 /* Omit eh_frame info for any user-defined global regs. */
28766 if (!REG_P (SET_SRC (set))
28767 || !fixed_reg_p (REGNO (SET_SRC (set))))
28768 RTX_FRAME_RELATED_P (set) = 1;
28771 else
28772 gcc_unreachable ();
28774 RTX_FRAME_RELATED_P (insn) = 1;
28775 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28777 return insn;
28780 /* Returns an insn that has a vrsave set operation with the
28781 appropriate CLOBBERs. */
28783 static rtx
28784 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28786 int nclobs, i;
28787 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28788 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28790 clobs[0]
28791 = gen_rtx_SET (vrsave,
28792 gen_rtx_UNSPEC_VOLATILE (SImode,
28793 gen_rtvec (2, reg, vrsave),
28794 UNSPECV_SET_VRSAVE));
28796 nclobs = 1;
28798 /* We need to clobber the registers in the mask so the scheduler
28799 does not move sets to VRSAVE before sets of AltiVec registers.
28801 However, if the function receives nonlocal gotos, reload will set
28802 all call saved registers live. We will end up with:
28804 (set (reg 999) (mem))
28805 (parallel [ (set (reg vrsave) (unspec blah))
28806 (clobber (reg 999))])
28808 The clobber will cause the store into reg 999 to be dead, and
28809 flow will attempt to delete an epilogue insn. In this case, we
28810 need an unspec use/set of the register. */
28812 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28813 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28815 if (!epiloguep || call_used_regs [i])
28816 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28817 gen_rtx_REG (V4SImode, i));
28818 else
28820 rtx reg = gen_rtx_REG (V4SImode, i);
28822 clobs[nclobs++]
28823 = gen_rtx_SET (reg,
28824 gen_rtx_UNSPEC (V4SImode,
28825 gen_rtvec (1, reg), 27));
28829 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28831 for (i = 0; i < nclobs; ++i)
28832 XVECEXP (insn, 0, i) = clobs[i];
28834 return insn;
28837 static rtx
28838 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28840 rtx addr, mem;
28842 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28843 mem = gen_frame_mem (GET_MODE (reg), addr);
28844 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28847 static rtx
28848 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28850 return gen_frame_set (reg, frame_reg, offset, false);
28853 static rtx
28854 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28856 return gen_frame_set (reg, frame_reg, offset, true);
28859 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28860 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28862 static rtx_insn *
28863 emit_frame_save (rtx frame_reg, machine_mode mode,
28864 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28866 rtx reg;
28868 /* Some cases that need register indexed addressing. */
28869 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28870 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28871 || (TARGET_E500_DOUBLE && mode == DFmode)
28872 || (TARGET_SPE_ABI
28873 && SPE_VECTOR_MODE (mode)
28874 && !SPE_CONST_OFFSET_OK (offset))));
28876 reg = gen_rtx_REG (mode, regno);
28877 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28878 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28879 NULL_RTX, NULL_RTX);
28882 /* Emit an offset memory reference suitable for a frame store, while
28883 converting to a valid addressing mode. */
28885 static rtx
28886 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28888 rtx int_rtx, offset_rtx;
28890 int_rtx = GEN_INT (offset);
28892 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28893 || (TARGET_E500_DOUBLE && mode == DFmode))
28895 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28896 emit_move_insn (offset_rtx, int_rtx);
28898 else
28899 offset_rtx = int_rtx;
28901 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28904 #ifndef TARGET_FIX_AND_CONTINUE
28905 #define TARGET_FIX_AND_CONTINUE 0
28906 #endif
28908 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28909 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28910 #define LAST_SAVRES_REGISTER 31
28911 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28913 enum {
28914 SAVRES_LR = 0x1,
28915 SAVRES_SAVE = 0x2,
28916 SAVRES_REG = 0x0c,
28917 SAVRES_GPR = 0,
28918 SAVRES_FPR = 4,
28919 SAVRES_VR = 8
28922 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28924 /* Temporary holding space for an out-of-line register save/restore
28925 routine name. */
28926 static char savres_routine_name[30];
28928 /* Return the name for an out-of-line register save/restore routine.
28929 We are saving/restoring GPRs if GPR is true. */
28931 static char *
28932 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28934 const char *prefix = "";
28935 const char *suffix = "";
28937 /* Different targets are supposed to define
28938 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28939 routine name could be defined with:
28941 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28943 This is a nice idea in practice, but in reality, things are
28944 complicated in several ways:
28946 - ELF targets have save/restore routines for GPRs.
28948 - SPE targets use different prefixes for 32/64-bit registers, and
28949 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28951 - PPC64 ELF targets have routines for save/restore of GPRs that
28952 differ in what they do with the link register, so having a set
28953 prefix doesn't work. (We only use one of the save routines at
28954 the moment, though.)
28956 - PPC32 elf targets have "exit" versions of the restore routines
28957 that restore the link register and can save some extra space.
28958 These require an extra suffix. (There are also "tail" versions
28959 of the restore routines and "GOT" versions of the save routines,
28960 but we don't generate those at present. Same problems apply,
28961 though.)
28963 We deal with all this by synthesizing our own prefix/suffix and
28964 using that for the simple sprintf call shown above. */
28965 if (TARGET_SPE)
28967 /* No floating point saves on the SPE. */
28968 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28970 if ((sel & SAVRES_SAVE))
28971 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28972 else
28973 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28975 if ((sel & SAVRES_LR))
28976 suffix = "_x";
28978 else if (DEFAULT_ABI == ABI_V4)
28980 if (TARGET_64BIT)
28981 goto aix_names;
28983 if ((sel & SAVRES_REG) == SAVRES_GPR)
28984 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28985 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28986 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28987 else if ((sel & SAVRES_REG) == SAVRES_VR)
28988 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28989 else
28990 abort ();
28992 if ((sel & SAVRES_LR))
28993 suffix = "_x";
28995 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28997 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28998 /* No out-of-line save/restore routines for GPRs on AIX. */
28999 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
29000 #endif
29002 aix_names:
29003 if ((sel & SAVRES_REG) == SAVRES_GPR)
29004 prefix = ((sel & SAVRES_SAVE)
29005 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
29006 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
29007 else if ((sel & SAVRES_REG) == SAVRES_FPR)
29009 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29010 if ((sel & SAVRES_LR))
29011 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
29012 else
29013 #endif
29015 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
29016 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
29019 else if ((sel & SAVRES_REG) == SAVRES_VR)
29020 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
29021 else
29022 abort ();
29025 if (DEFAULT_ABI == ABI_DARWIN)
29027 /* The Darwin approach is (slightly) different, in order to be
29028 compatible with code generated by the system toolchain. There is a
29029 single symbol for the start of save sequence, and the code here
29030 embeds an offset into that code on the basis of the first register
29031 to be saved. */
29032 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
29033 if ((sel & SAVRES_REG) == SAVRES_GPR)
29034 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
29035 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
29036 (regno - 13) * 4, prefix, regno);
29037 else if ((sel & SAVRES_REG) == SAVRES_FPR)
29038 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
29039 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
29040 else if ((sel & SAVRES_REG) == SAVRES_VR)
29041 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
29042 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
29043 else
29044 abort ();
29046 else
29047 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
29049 return savres_routine_name;
29052 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
29053 We are saving/restoring GPRs if GPR is true. */
29055 static rtx
29056 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
29058 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
29059 ? info->first_gp_reg_save
29060 : (sel & SAVRES_REG) == SAVRES_FPR
29061 ? info->first_fp_reg_save - 32
29062 : (sel & SAVRES_REG) == SAVRES_VR
29063 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
29064 : -1);
29065 rtx sym;
29066 int select = sel;
29068 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
29069 versions of the gpr routines. */
29070 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
29071 && info->spe_64bit_regs_used)
29072 select ^= SAVRES_FPR ^ SAVRES_GPR;
29074 /* Don't generate bogus routine names. */
29075 gcc_assert (FIRST_SAVRES_REGISTER <= regno
29076 && regno <= LAST_SAVRES_REGISTER
29077 && select >= 0 && select <= 12);
29079 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
29081 if (sym == NULL)
29083 char *name;
29085 name = rs6000_savres_routine_name (info, regno, sel);
29087 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
29088 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
29089 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
29092 return sym;
29095 /* Emit a sequence of insns, including a stack tie if needed, for
29096 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
29097 reset the stack pointer, but move the base of the frame into
29098 reg UPDT_REGNO for use by out-of-line register restore routines. */
29100 static rtx
29101 rs6000_emit_stack_reset (rs6000_stack_t *info,
29102 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
29103 unsigned updt_regno)
29105 /* If there is nothing to do, don't do anything. */
29106 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
29107 return NULL_RTX;
29109 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
29111 /* This blockage is needed so that sched doesn't decide to move
29112 the sp change before the register restores. */
29113 if (DEFAULT_ABI == ABI_V4
29114 || (TARGET_SPE_ABI
29115 && info->spe_64bit_regs_used != 0
29116 && info->first_gp_reg_save != 32))
29117 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
29118 GEN_INT (frame_off)));
29120 /* If we are restoring registers out-of-line, we will be using the
29121 "exit" variants of the restore routines, which will reset the
29122 stack for us. But we do need to point updt_reg into the
29123 right place for those routines. */
29124 if (frame_off != 0)
29125 return emit_insn (gen_add3_insn (updt_reg_rtx,
29126 frame_reg_rtx, GEN_INT (frame_off)));
29127 else
29128 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
29130 return NULL_RTX;
29133 /* Return the register number used as a pointer by out-of-line
29134 save/restore functions. */
29136 static inline unsigned
29137 ptr_regno_for_savres (int sel)
29139 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29140 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
29141 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
29144 /* Construct a parallel rtx describing the effect of a call to an
29145 out-of-line register save/restore routine, and emit the insn
29146 or jump_insn as appropriate. */
29148 static rtx_insn *
29149 rs6000_emit_savres_rtx (rs6000_stack_t *info,
29150 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
29151 machine_mode reg_mode, int sel)
29153 int i;
29154 int offset, start_reg, end_reg, n_regs, use_reg;
29155 int reg_size = GET_MODE_SIZE (reg_mode);
29156 rtx sym;
29157 rtvec p;
29158 rtx par;
29159 rtx_insn *insn;
29161 offset = 0;
29162 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29163 ? info->first_gp_reg_save
29164 : (sel & SAVRES_REG) == SAVRES_FPR
29165 ? info->first_fp_reg_save
29166 : (sel & SAVRES_REG) == SAVRES_VR
29167 ? info->first_altivec_reg_save
29168 : -1);
29169 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29170 ? 32
29171 : (sel & SAVRES_REG) == SAVRES_FPR
29172 ? 64
29173 : (sel & SAVRES_REG) == SAVRES_VR
29174 ? LAST_ALTIVEC_REGNO + 1
29175 : -1);
29176 n_regs = end_reg - start_reg;
29177 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
29178 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
29179 + n_regs);
29181 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29182 RTVEC_ELT (p, offset++) = ret_rtx;
29184 RTVEC_ELT (p, offset++)
29185 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29187 sym = rs6000_savres_routine_sym (info, sel);
29188 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
29190 use_reg = ptr_regno_for_savres (sel);
29191 if ((sel & SAVRES_REG) == SAVRES_VR)
29193 /* Vector regs are saved/restored using [reg+reg] addressing. */
29194 RTVEC_ELT (p, offset++)
29195 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29196 RTVEC_ELT (p, offset++)
29197 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
29199 else
29200 RTVEC_ELT (p, offset++)
29201 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29203 for (i = 0; i < end_reg - start_reg; i++)
29204 RTVEC_ELT (p, i + offset)
29205 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
29206 frame_reg_rtx, save_area_offset + reg_size * i,
29207 (sel & SAVRES_SAVE) != 0);
29209 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29210 RTVEC_ELT (p, i + offset)
29211 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
29213 par = gen_rtx_PARALLEL (VOIDmode, p);
29215 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29217 insn = emit_jump_insn (par);
29218 JUMP_LABEL (insn) = ret_rtx;
29220 else
29221 insn = emit_insn (par);
29222 return insn;
29225 /* Emit code to store CR fields that need to be saved into REG. */
29227 static void
29228 rs6000_emit_move_from_cr (rtx reg)
29230 /* Only the ELFv2 ABI allows storing only selected fields. */
29231 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
29233 int i, cr_reg[8], count = 0;
29235 /* Collect CR fields that must be saved. */
29236 for (i = 0; i < 8; i++)
29237 if (save_reg_p (CR0_REGNO + i))
29238 cr_reg[count++] = i;
29240 /* If it's just a single one, use mfcrf. */
29241 if (count == 1)
29243 rtvec p = rtvec_alloc (1);
29244 rtvec r = rtvec_alloc (2);
29245 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
29246 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
29247 RTVEC_ELT (p, 0)
29248 = gen_rtx_SET (reg,
29249 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
29251 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29252 return;
29255 /* ??? It might be better to handle count == 2 / 3 cases here
29256 as well, using logical operations to combine the values. */
29259 emit_insn (gen_movesi_from_cr (reg));
29262 /* Return whether the split-stack arg pointer (r12) is used. */
29264 static bool
29265 split_stack_arg_pointer_used_p (void)
29267 /* If the pseudo holding the arg pointer is no longer a pseudo,
29268 then the arg pointer is used. */
29269 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
29270 && (!REG_P (cfun->machine->split_stack_arg_pointer)
29271 || (REGNO (cfun->machine->split_stack_arg_pointer)
29272 < FIRST_PSEUDO_REGISTER)))
29273 return true;
29275 /* Unfortunately we also need to do some code scanning, since
29276 r12 may have been substituted for the pseudo. */
29277 rtx_insn *insn;
29278 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
29279 FOR_BB_INSNS (bb, insn)
29280 if (NONDEBUG_INSN_P (insn))
29282 /* A call destroys r12. */
29283 if (CALL_P (insn))
29284 return false;
29286 df_ref use;
29287 FOR_EACH_INSN_USE (use, insn)
29289 rtx x = DF_REF_REG (use);
29290 if (REG_P (x) && REGNO (x) == 12)
29291 return true;
29293 df_ref def;
29294 FOR_EACH_INSN_DEF (def, insn)
29296 rtx x = DF_REF_REG (def);
29297 if (REG_P (x) && REGNO (x) == 12)
29298 return false;
29301 return bitmap_bit_p (DF_LR_OUT (bb), 12);
29304 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29306 static bool
29307 rs6000_global_entry_point_needed_p (void)
29309 /* Only needed for the ELFv2 ABI. */
29310 if (DEFAULT_ABI != ABI_ELFv2)
29311 return false;
29313 /* With -msingle-pic-base, we assume the whole program shares the same
29314 TOC, so no global entry point prologues are needed anywhere. */
29315 if (TARGET_SINGLE_PIC_BASE)
29316 return false;
29318 /* Ensure we have a global entry point for thunks. ??? We could
29319 avoid that if the target routine doesn't need a global entry point,
29320 but we do not know whether this is the case at this point. */
29321 if (cfun->is_thunk)
29322 return true;
29324 /* For regular functions, rs6000_emit_prologue sets this flag if the
29325 routine ever uses the TOC pointer. */
29326 return cfun->machine->r2_setup_needed;
29329 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29330 static sbitmap
29331 rs6000_get_separate_components (void)
29333 rs6000_stack_t *info = rs6000_stack_info ();
29335 if (WORLD_SAVE_P (info))
29336 return NULL;
29338 if (TARGET_SPE_ABI)
29339 return NULL;
29341 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
29342 && !(info->savres_strategy & REST_MULTIPLE));
29344 /* Component 0 is the save/restore of LR (done via GPR0).
29345 Components 13..31 are the save/restore of GPR13..GPR31.
29346 Components 46..63 are the save/restore of FPR14..FPR31. */
29348 cfun->machine->n_components = 64;
29350 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29351 bitmap_clear (components);
29353 int reg_size = TARGET_32BIT ? 4 : 8;
29354 int fp_reg_size = 8;
29356 /* The GPRs we need saved to the frame. */
29357 if ((info->savres_strategy & SAVE_INLINE_GPRS)
29358 && (info->savres_strategy & REST_INLINE_GPRS))
29360 int offset = info->gp_save_offset;
29361 if (info->push_p)
29362 offset += info->total_size;
29364 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29366 if (IN_RANGE (offset, -0x8000, 0x7fff)
29367 && rs6000_reg_live_or_pic_offset_p (regno))
29368 bitmap_set_bit (components, regno);
29370 offset += reg_size;
29374 /* Don't mess with the hard frame pointer. */
29375 if (frame_pointer_needed)
29376 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29378 /* Don't mess with the fixed TOC register. */
29379 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
29380 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
29381 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
29382 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
29384 /* The FPRs we need saved to the frame. */
29385 if ((info->savres_strategy & SAVE_INLINE_FPRS)
29386 && (info->savres_strategy & REST_INLINE_FPRS))
29388 int offset = info->fp_save_offset;
29389 if (info->push_p)
29390 offset += info->total_size;
29392 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29394 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
29395 bitmap_set_bit (components, regno);
29397 offset += fp_reg_size;
29401 /* Optimize LR save and restore if we can. This is component 0. Any
29402 out-of-line register save/restore routines need LR. */
29403 if (info->lr_save_p
29404 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
29405 && (info->savres_strategy & SAVE_INLINE_GPRS)
29406 && (info->savres_strategy & REST_INLINE_GPRS)
29407 && (info->savres_strategy & SAVE_INLINE_FPRS)
29408 && (info->savres_strategy & REST_INLINE_FPRS)
29409 && (info->savres_strategy & SAVE_INLINE_VRS)
29410 && (info->savres_strategy & REST_INLINE_VRS))
29412 int offset = info->lr_save_offset;
29413 if (info->push_p)
29414 offset += info->total_size;
29415 if (IN_RANGE (offset, -0x8000, 0x7fff))
29416 bitmap_set_bit (components, 0);
29419 return components;
29422 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29423 static sbitmap
29424 rs6000_components_for_bb (basic_block bb)
29426 rs6000_stack_t *info = rs6000_stack_info ();
29428 bitmap in = DF_LIVE_IN (bb);
29429 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29430 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29432 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29433 bitmap_clear (components);
29435 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29437 /* GPRs. */
29438 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29439 if (bitmap_bit_p (in, regno)
29440 || bitmap_bit_p (gen, regno)
29441 || bitmap_bit_p (kill, regno))
29442 bitmap_set_bit (components, regno);
29444 /* FPRs. */
29445 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29446 if (bitmap_bit_p (in, regno)
29447 || bitmap_bit_p (gen, regno)
29448 || bitmap_bit_p (kill, regno))
29449 bitmap_set_bit (components, regno);
29451 /* The link register. */
29452 if (bitmap_bit_p (in, LR_REGNO)
29453 || bitmap_bit_p (gen, LR_REGNO)
29454 || bitmap_bit_p (kill, LR_REGNO))
29455 bitmap_set_bit (components, 0);
29457 return components;
29460 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29461 static void
29462 rs6000_disqualify_components (sbitmap components, edge e,
29463 sbitmap edge_components, bool /*is_prologue*/)
29465 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29466 live where we want to place that code. */
29467 if (bitmap_bit_p (edge_components, 0)
29468 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29470 if (dump_file)
29471 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29472 "on entry to bb %d\n", e->dest->index);
29473 bitmap_clear_bit (components, 0);
29477 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29478 static void
29479 rs6000_emit_prologue_components (sbitmap components)
29481 rs6000_stack_t *info = rs6000_stack_info ();
29482 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29483 ? HARD_FRAME_POINTER_REGNUM
29484 : STACK_POINTER_REGNUM);
29486 machine_mode reg_mode = Pmode;
29487 int reg_size = TARGET_32BIT ? 4 : 8;
29488 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29489 ? DFmode : SFmode;
29490 int fp_reg_size = 8;
29492 /* Prologue for LR. */
29493 if (bitmap_bit_p (components, 0))
29495 rtx reg = gen_rtx_REG (reg_mode, 0);
29496 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
29497 RTX_FRAME_RELATED_P (insn) = 1;
29498 add_reg_note (insn, REG_CFA_REGISTER, NULL);
29500 int offset = info->lr_save_offset;
29501 if (info->push_p)
29502 offset += info->total_size;
29504 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29505 RTX_FRAME_RELATED_P (insn) = 1;
29506 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
29507 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29508 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29511 /* Prologue for the GPRs. */
29512 int offset = info->gp_save_offset;
29513 if (info->push_p)
29514 offset += info->total_size;
29516 for (int i = info->first_gp_reg_save; i < 32; i++)
29518 if (bitmap_bit_p (components, i))
29520 rtx reg = gen_rtx_REG (reg_mode, i);
29521 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29522 RTX_FRAME_RELATED_P (insn) = 1;
29523 rtx set = copy_rtx (single_set (insn));
29524 add_reg_note (insn, REG_CFA_OFFSET, set);
29527 offset += reg_size;
29530 /* Prologue for the FPRs. */
29531 offset = info->fp_save_offset;
29532 if (info->push_p)
29533 offset += info->total_size;
29535 for (int i = info->first_fp_reg_save; i < 64; i++)
29537 if (bitmap_bit_p (components, i))
29539 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29540 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29541 RTX_FRAME_RELATED_P (insn) = 1;
29542 rtx set = copy_rtx (single_set (insn));
29543 add_reg_note (insn, REG_CFA_OFFSET, set);
29546 offset += fp_reg_size;
29550 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29551 static void
29552 rs6000_emit_epilogue_components (sbitmap components)
29554 rs6000_stack_t *info = rs6000_stack_info ();
29555 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29556 ? HARD_FRAME_POINTER_REGNUM
29557 : STACK_POINTER_REGNUM);
29559 machine_mode reg_mode = Pmode;
29560 int reg_size = TARGET_32BIT ? 4 : 8;
29562 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29563 ? DFmode : SFmode;
29564 int fp_reg_size = 8;
29566 /* Epilogue for the FPRs. */
29567 int offset = info->fp_save_offset;
29568 if (info->push_p)
29569 offset += info->total_size;
29571 for (int i = info->first_fp_reg_save; i < 64; i++)
29573 if (bitmap_bit_p (components, i))
29575 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29576 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29577 RTX_FRAME_RELATED_P (insn) = 1;
29578 add_reg_note (insn, REG_CFA_RESTORE, reg);
29581 offset += fp_reg_size;
29584 /* Epilogue for the GPRs. */
29585 offset = info->gp_save_offset;
29586 if (info->push_p)
29587 offset += info->total_size;
29589 for (int i = info->first_gp_reg_save; i < 32; i++)
29591 if (bitmap_bit_p (components, i))
29593 rtx reg = gen_rtx_REG (reg_mode, i);
29594 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29595 RTX_FRAME_RELATED_P (insn) = 1;
29596 add_reg_note (insn, REG_CFA_RESTORE, reg);
29599 offset += reg_size;
29602 /* Epilogue for LR. */
29603 if (bitmap_bit_p (components, 0))
29605 int offset = info->lr_save_offset;
29606 if (info->push_p)
29607 offset += info->total_size;
29609 rtx reg = gen_rtx_REG (reg_mode, 0);
29610 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29612 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29613 insn = emit_move_insn (lr, reg);
29614 RTX_FRAME_RELATED_P (insn) = 1;
29615 add_reg_note (insn, REG_CFA_RESTORE, lr);
29619 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29620 static void
29621 rs6000_set_handled_components (sbitmap components)
29623 rs6000_stack_t *info = rs6000_stack_info ();
29625 for (int i = info->first_gp_reg_save; i < 32; i++)
29626 if (bitmap_bit_p (components, i))
29627 cfun->machine->gpr_is_wrapped_separately[i] = true;
29629 for (int i = info->first_fp_reg_save; i < 64; i++)
29630 if (bitmap_bit_p (components, i))
29631 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
29633 if (bitmap_bit_p (components, 0))
29634 cfun->machine->lr_is_wrapped_separately = true;
29637 /* Emit function prologue as insns. */
29639 void
29640 rs6000_emit_prologue (void)
29642 rs6000_stack_t *info = rs6000_stack_info ();
29643 machine_mode reg_mode = Pmode;
29644 int reg_size = TARGET_32BIT ? 4 : 8;
29645 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29646 ? DFmode : SFmode;
29647 int fp_reg_size = 8;
29648 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29649 rtx frame_reg_rtx = sp_reg_rtx;
29650 unsigned int cr_save_regno;
29651 rtx cr_save_rtx = NULL_RTX;
29652 rtx_insn *insn;
29653 int strategy;
29654 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29655 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29656 && call_used_regs[STATIC_CHAIN_REGNUM]);
29657 int using_split_stack = (flag_split_stack
29658 && (lookup_attribute ("no_split_stack",
29659 DECL_ATTRIBUTES (cfun->decl))
29660 == NULL));
29662 /* Offset to top of frame for frame_reg and sp respectively. */
29663 HOST_WIDE_INT frame_off = 0;
29664 HOST_WIDE_INT sp_off = 0;
29665 /* sp_adjust is the stack adjusting instruction, tracked so that the
29666 insn setting up the split-stack arg pointer can be emitted just
29667 prior to it, when r12 is not used here for other purposes. */
29668 rtx_insn *sp_adjust = 0;
29670 #if CHECKING_P
29671 /* Track and check usage of r0, r11, r12. */
29672 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29673 #define START_USE(R) do \
29675 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29676 reg_inuse |= 1 << (R); \
29677 } while (0)
29678 #define END_USE(R) do \
29680 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29681 reg_inuse &= ~(1 << (R)); \
29682 } while (0)
29683 #define NOT_INUSE(R) do \
29685 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29686 } while (0)
29687 #else
29688 #define START_USE(R) do {} while (0)
29689 #define END_USE(R) do {} while (0)
29690 #define NOT_INUSE(R) do {} while (0)
29691 #endif
29693 if (DEFAULT_ABI == ABI_ELFv2
29694 && !TARGET_SINGLE_PIC_BASE)
29696 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29698 /* With -mminimal-toc we may generate an extra use of r2 below. */
29699 if (TARGET_TOC && TARGET_MINIMAL_TOC
29700 && !constant_pool_empty_p ())
29701 cfun->machine->r2_setup_needed = true;
29705 if (flag_stack_usage_info)
29706 current_function_static_stack_size = info->total_size;
29708 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29709 || flag_stack_clash_protection)
29711 HOST_WIDE_INT size = info->total_size;
29713 if (crtl->is_leaf && !cfun->calls_alloca)
29715 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29716 rs6000_emit_probe_stack_range (get_stack_check_protect (),
29717 size - get_stack_check_protect ());
29719 else if (size > 0)
29720 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
29723 if (TARGET_FIX_AND_CONTINUE)
29725 /* gdb on darwin arranges to forward a function from the old
29726 address by modifying the first 5 instructions of the function
29727 to branch to the overriding function. This is necessary to
29728 permit function pointers that point to the old function to
29729 actually forward to the new function. */
29730 emit_insn (gen_nop ());
29731 emit_insn (gen_nop ());
29732 emit_insn (gen_nop ());
29733 emit_insn (gen_nop ());
29734 emit_insn (gen_nop ());
29737 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29739 reg_mode = V2SImode;
29740 reg_size = 8;
29743 /* Handle world saves specially here. */
29744 if (WORLD_SAVE_P (info))
29746 int i, j, sz;
29747 rtx treg;
29748 rtvec p;
29749 rtx reg0;
29751 /* save_world expects lr in r0. */
29752 reg0 = gen_rtx_REG (Pmode, 0);
29753 if (info->lr_save_p)
29755 insn = emit_move_insn (reg0,
29756 gen_rtx_REG (Pmode, LR_REGNO));
29757 RTX_FRAME_RELATED_P (insn) = 1;
29760 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29761 assumptions about the offsets of various bits of the stack
29762 frame. */
29763 gcc_assert (info->gp_save_offset == -220
29764 && info->fp_save_offset == -144
29765 && info->lr_save_offset == 8
29766 && info->cr_save_offset == 4
29767 && info->push_p
29768 && info->lr_save_p
29769 && (!crtl->calls_eh_return
29770 || info->ehrd_offset == -432)
29771 && info->vrsave_save_offset == -224
29772 && info->altivec_save_offset == -416);
29774 treg = gen_rtx_REG (SImode, 11);
29775 emit_move_insn (treg, GEN_INT (-info->total_size));
29777 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29778 in R11. It also clobbers R12, so beware! */
29780 /* Preserve CR2 for save_world prologues */
29781 sz = 5;
29782 sz += 32 - info->first_gp_reg_save;
29783 sz += 64 - info->first_fp_reg_save;
29784 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29785 p = rtvec_alloc (sz);
29786 j = 0;
29787 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29788 gen_rtx_REG (SImode,
29789 LR_REGNO));
29790 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29791 gen_rtx_SYMBOL_REF (Pmode,
29792 "*save_world"));
29793 /* We do floats first so that the instruction pattern matches
29794 properly. */
29795 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29796 RTVEC_ELT (p, j++)
29797 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29798 ? DFmode : SFmode,
29799 info->first_fp_reg_save + i),
29800 frame_reg_rtx,
29801 info->fp_save_offset + frame_off + 8 * i);
29802 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29803 RTVEC_ELT (p, j++)
29804 = gen_frame_store (gen_rtx_REG (V4SImode,
29805 info->first_altivec_reg_save + i),
29806 frame_reg_rtx,
29807 info->altivec_save_offset + frame_off + 16 * i);
29808 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29809 RTVEC_ELT (p, j++)
29810 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29811 frame_reg_rtx,
29812 info->gp_save_offset + frame_off + reg_size * i);
29814 /* CR register traditionally saved as CR2. */
29815 RTVEC_ELT (p, j++)
29816 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29817 frame_reg_rtx, info->cr_save_offset + frame_off);
29818 /* Explain about use of R0. */
29819 if (info->lr_save_p)
29820 RTVEC_ELT (p, j++)
29821 = gen_frame_store (reg0,
29822 frame_reg_rtx, info->lr_save_offset + frame_off);
29823 /* Explain what happens to the stack pointer. */
29825 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29826 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29829 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29830 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29831 treg, GEN_INT (-info->total_size));
29832 sp_off = frame_off = info->total_size;
29835 strategy = info->savres_strategy;
29837 /* For V.4, update stack before we do any saving and set back pointer. */
29838 if (! WORLD_SAVE_P (info)
29839 && info->push_p
29840 && (DEFAULT_ABI == ABI_V4
29841 || crtl->calls_eh_return))
29843 bool need_r11 = (TARGET_SPE
29844 ? (!(strategy & SAVE_INLINE_GPRS)
29845 && info->spe_64bit_regs_used == 0)
29846 : (!(strategy & SAVE_INLINE_FPRS)
29847 || !(strategy & SAVE_INLINE_GPRS)
29848 || !(strategy & SAVE_INLINE_VRS)));
29849 int ptr_regno = -1;
29850 rtx ptr_reg = NULL_RTX;
29851 int ptr_off = 0;
29853 if (info->total_size < 32767)
29854 frame_off = info->total_size;
29855 else if (need_r11)
29856 ptr_regno = 11;
29857 else if (info->cr_save_p
29858 || info->lr_save_p
29859 || info->first_fp_reg_save < 64
29860 || info->first_gp_reg_save < 32
29861 || info->altivec_size != 0
29862 || info->vrsave_size != 0
29863 || crtl->calls_eh_return)
29864 ptr_regno = 12;
29865 else
29867 /* The prologue won't be saving any regs so there is no need
29868 to set up a frame register to access any frame save area.
29869 We also won't be using frame_off anywhere below, but set
29870 the correct value anyway to protect against future
29871 changes to this function. */
29872 frame_off = info->total_size;
29874 if (ptr_regno != -1)
29876 /* Set up the frame offset to that needed by the first
29877 out-of-line save function. */
29878 START_USE (ptr_regno);
29879 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29880 frame_reg_rtx = ptr_reg;
29881 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29882 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29883 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29884 ptr_off = info->gp_save_offset + info->gp_size;
29885 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29886 ptr_off = info->altivec_save_offset + info->altivec_size;
29887 frame_off = -ptr_off;
29889 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29890 ptr_reg, ptr_off);
29891 if (REGNO (frame_reg_rtx) == 12)
29892 sp_adjust = 0;
29893 sp_off = info->total_size;
29894 if (frame_reg_rtx != sp_reg_rtx)
29895 rs6000_emit_stack_tie (frame_reg_rtx, false);
29898 /* If we use the link register, get it into r0. */
29899 if (!WORLD_SAVE_P (info) && info->lr_save_p
29900 && !cfun->machine->lr_is_wrapped_separately)
29902 rtx addr, reg, mem;
29904 reg = gen_rtx_REG (Pmode, 0);
29905 START_USE (0);
29906 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29907 RTX_FRAME_RELATED_P (insn) = 1;
29909 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29910 | SAVE_NOINLINE_FPRS_SAVES_LR)))
29912 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29913 GEN_INT (info->lr_save_offset + frame_off));
29914 mem = gen_rtx_MEM (Pmode, addr);
29915 /* This should not be of rs6000_sr_alias_set, because of
29916 __builtin_return_address. */
29918 insn = emit_move_insn (mem, reg);
29919 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29920 NULL_RTX, NULL_RTX);
29921 END_USE (0);
29925 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29926 r12 will be needed by out-of-line gpr restore. */
29927 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29928 && !(strategy & (SAVE_INLINE_GPRS
29929 | SAVE_NOINLINE_GPRS_SAVES_LR))
29930 ? 11 : 12);
29931 if (!WORLD_SAVE_P (info)
29932 && info->cr_save_p
29933 && REGNO (frame_reg_rtx) != cr_save_regno
29934 && !(using_static_chain_p && cr_save_regno == 11)
29935 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29937 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29938 START_USE (cr_save_regno);
29939 rs6000_emit_move_from_cr (cr_save_rtx);
29942 /* Do any required saving of fpr's. If only one or two to save, do
29943 it ourselves. Otherwise, call function. */
29944 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29946 int offset = info->fp_save_offset + frame_off;
29947 for (int i = info->first_fp_reg_save; i < 64; i++)
29949 if (save_reg_p (i)
29950 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
29951 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
29952 sp_off - frame_off);
29954 offset += fp_reg_size;
29957 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29959 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29960 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29961 unsigned ptr_regno = ptr_regno_for_savres (sel);
29962 rtx ptr_reg = frame_reg_rtx;
29964 if (REGNO (frame_reg_rtx) == ptr_regno)
29965 gcc_checking_assert (frame_off == 0);
29966 else
29968 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29969 NOT_INUSE (ptr_regno);
29970 emit_insn (gen_add3_insn (ptr_reg,
29971 frame_reg_rtx, GEN_INT (frame_off)));
29973 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29974 info->fp_save_offset,
29975 info->lr_save_offset,
29976 DFmode, sel);
29977 rs6000_frame_related (insn, ptr_reg, sp_off,
29978 NULL_RTX, NULL_RTX);
29979 if (lr)
29980 END_USE (0);
29983 /* Save GPRs. This is done as a PARALLEL if we are using
29984 the store-multiple instructions. */
29985 if (!WORLD_SAVE_P (info)
29986 && TARGET_SPE_ABI
29987 && info->spe_64bit_regs_used != 0
29988 && info->first_gp_reg_save != 32)
29990 int i;
29991 rtx spe_save_area_ptr;
29992 HOST_WIDE_INT save_off;
29993 int ool_adjust = 0;
29995 /* Determine whether we can address all of the registers that need
29996 to be saved with an offset from frame_reg_rtx that fits in
29997 the small const field for SPE memory instructions. */
29998 int spe_regs_addressable
29999 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
30000 + reg_size * (32 - info->first_gp_reg_save - 1))
30001 && (strategy & SAVE_INLINE_GPRS));
30003 if (spe_regs_addressable)
30005 spe_save_area_ptr = frame_reg_rtx;
30006 save_off = frame_off;
30008 else
30010 /* Make r11 point to the start of the SPE save area. We need
30011 to be careful here if r11 is holding the static chain. If
30012 it is, then temporarily save it in r0. */
30013 HOST_WIDE_INT offset;
30015 if (!(strategy & SAVE_INLINE_GPRS))
30016 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
30017 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
30018 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
30019 save_off = frame_off - offset;
30021 if (using_static_chain_p)
30023 rtx r0 = gen_rtx_REG (Pmode, 0);
30025 START_USE (0);
30026 gcc_assert (info->first_gp_reg_save > 11);
30028 emit_move_insn (r0, spe_save_area_ptr);
30030 else if (REGNO (frame_reg_rtx) != 11)
30031 START_USE (11);
30033 emit_insn (gen_addsi3 (spe_save_area_ptr,
30034 frame_reg_rtx, GEN_INT (offset)));
30035 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
30036 frame_off = -info->spe_gp_save_offset + ool_adjust;
30039 if ((strategy & SAVE_INLINE_GPRS))
30041 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30042 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
30043 emit_frame_save (spe_save_area_ptr, reg_mode,
30044 info->first_gp_reg_save + i,
30045 (info->spe_gp_save_offset + save_off
30046 + reg_size * i),
30047 sp_off - save_off);
30049 else
30051 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
30052 info->spe_gp_save_offset + save_off,
30053 0, reg_mode,
30054 SAVRES_SAVE | SAVRES_GPR);
30056 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
30057 NULL_RTX, NULL_RTX);
30060 /* Move the static chain pointer back. */
30061 if (!spe_regs_addressable)
30063 if (using_static_chain_p)
30065 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
30066 END_USE (0);
30068 else if (REGNO (frame_reg_rtx) != 11)
30069 END_USE (11);
30072 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
30074 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
30075 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
30076 unsigned ptr_regno = ptr_regno_for_savres (sel);
30077 rtx ptr_reg = frame_reg_rtx;
30078 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
30079 int end_save = info->gp_save_offset + info->gp_size;
30080 int ptr_off;
30082 if (ptr_regno == 12)
30083 sp_adjust = 0;
30084 if (!ptr_set_up)
30085 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30087 /* Need to adjust r11 (r12) if we saved any FPRs. */
30088 if (end_save + frame_off != 0)
30090 rtx offset = GEN_INT (end_save + frame_off);
30092 if (ptr_set_up)
30093 frame_off = -end_save;
30094 else
30095 NOT_INUSE (ptr_regno);
30096 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30098 else if (!ptr_set_up)
30100 NOT_INUSE (ptr_regno);
30101 emit_move_insn (ptr_reg, frame_reg_rtx);
30103 ptr_off = -end_save;
30104 insn = rs6000_emit_savres_rtx (info, ptr_reg,
30105 info->gp_save_offset + ptr_off,
30106 info->lr_save_offset + ptr_off,
30107 reg_mode, sel);
30108 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
30109 NULL_RTX, NULL_RTX);
30110 if (lr)
30111 END_USE (0);
30113 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
30115 rtvec p;
30116 int i;
30117 p = rtvec_alloc (32 - info->first_gp_reg_save);
30118 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30119 RTVEC_ELT (p, i)
30120 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30121 frame_reg_rtx,
30122 info->gp_save_offset + frame_off + reg_size * i);
30123 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30124 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30125 NULL_RTX, NULL_RTX);
30127 else if (!WORLD_SAVE_P (info))
30129 int offset = info->gp_save_offset + frame_off;
30130 for (int i = info->first_gp_reg_save; i < 32; i++)
30132 if (rs6000_reg_live_or_pic_offset_p (i)
30133 && !cfun->machine->gpr_is_wrapped_separately[i])
30134 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
30135 sp_off - frame_off);
30137 offset += reg_size;
30141 if (crtl->calls_eh_return)
30143 unsigned int i;
30144 rtvec p;
30146 for (i = 0; ; ++i)
30148 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30149 if (regno == INVALID_REGNUM)
30150 break;
30153 p = rtvec_alloc (i);
30155 for (i = 0; ; ++i)
30157 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30158 if (regno == INVALID_REGNUM)
30159 break;
30161 rtx set
30162 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
30163 sp_reg_rtx,
30164 info->ehrd_offset + sp_off + reg_size * (int) i);
30165 RTVEC_ELT (p, i) = set;
30166 RTX_FRAME_RELATED_P (set) = 1;
30169 insn = emit_insn (gen_blockage ());
30170 RTX_FRAME_RELATED_P (insn) = 1;
30171 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
30174 /* In AIX ABI we need to make sure r2 is really saved. */
30175 if (TARGET_AIX && crtl->calls_eh_return)
30177 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
30178 rtx join_insn, note;
30179 rtx_insn *save_insn;
30180 long toc_restore_insn;
30182 tmp_reg = gen_rtx_REG (Pmode, 11);
30183 tmp_reg_si = gen_rtx_REG (SImode, 11);
30184 if (using_static_chain_p)
30186 START_USE (0);
30187 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
30189 else
30190 START_USE (11);
30191 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
30192 /* Peek at instruction to which this function returns. If it's
30193 restoring r2, then we know we've already saved r2. We can't
30194 unconditionally save r2 because the value we have will already
30195 be updated if we arrived at this function via a plt call or
30196 toc adjusting stub. */
30197 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
30198 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
30199 + RS6000_TOC_SAVE_SLOT);
30200 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
30201 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
30202 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
30203 validate_condition_mode (EQ, CCUNSmode);
30204 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
30205 emit_insn (gen_rtx_SET (compare_result,
30206 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
30207 toc_save_done = gen_label_rtx ();
30208 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30209 gen_rtx_EQ (VOIDmode, compare_result,
30210 const0_rtx),
30211 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
30212 pc_rtx);
30213 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30214 JUMP_LABEL (jump) = toc_save_done;
30215 LABEL_NUSES (toc_save_done) += 1;
30217 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
30218 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
30219 sp_off - frame_off);
30221 emit_label (toc_save_done);
30223 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30224 have a CFG that has different saves along different paths.
30225 Move the note to a dummy blockage insn, which describes that
30226 R2 is unconditionally saved after the label. */
30227 /* ??? An alternate representation might be a special insn pattern
30228 containing both the branch and the store. That might let the
30229 code that minimizes the number of DW_CFA_advance opcodes better
30230 freedom in placing the annotations. */
30231 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
30232 if (note)
30233 remove_note (save_insn, note);
30234 else
30235 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
30236 copy_rtx (PATTERN (save_insn)), NULL_RTX);
30237 RTX_FRAME_RELATED_P (save_insn) = 0;
30239 join_insn = emit_insn (gen_blockage ());
30240 REG_NOTES (join_insn) = note;
30241 RTX_FRAME_RELATED_P (join_insn) = 1;
30243 if (using_static_chain_p)
30245 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
30246 END_USE (0);
30248 else
30249 END_USE (11);
30252 /* Save CR if we use any that must be preserved. */
30253 if (!WORLD_SAVE_P (info) && info->cr_save_p)
30255 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
30256 GEN_INT (info->cr_save_offset + frame_off));
30257 rtx mem = gen_frame_mem (SImode, addr);
30259 /* If we didn't copy cr before, do so now using r0. */
30260 if (cr_save_rtx == NULL_RTX)
30262 START_USE (0);
30263 cr_save_rtx = gen_rtx_REG (SImode, 0);
30264 rs6000_emit_move_from_cr (cr_save_rtx);
30267 /* Saving CR requires a two-instruction sequence: one instruction
30268 to move the CR to a general-purpose register, and a second
30269 instruction that stores the GPR to memory.
30271 We do not emit any DWARF CFI records for the first of these,
30272 because we cannot properly represent the fact that CR is saved in
30273 a register. One reason is that we cannot express that multiple
30274 CR fields are saved; another reason is that on 64-bit, the size
30275 of the CR register in DWARF (4 bytes) differs from the size of
30276 a general-purpose register.
30278 This means if any intervening instruction were to clobber one of
30279 the call-saved CR fields, we'd have incorrect CFI. To prevent
30280 this from happening, we mark the store to memory as a use of
30281 those CR fields, which prevents any such instruction from being
30282 scheduled in between the two instructions. */
30283 rtx crsave_v[9];
30284 int n_crsave = 0;
30285 int i;
30287 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
30288 for (i = 0; i < 8; i++)
30289 if (save_reg_p (CR0_REGNO + i))
30290 crsave_v[n_crsave++]
30291 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30293 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
30294 gen_rtvec_v (n_crsave, crsave_v)));
30295 END_USE (REGNO (cr_save_rtx));
30297 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30298 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30299 so we need to construct a frame expression manually. */
30300 RTX_FRAME_RELATED_P (insn) = 1;
30302 /* Update address to be stack-pointer relative, like
30303 rs6000_frame_related would do. */
30304 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
30305 GEN_INT (info->cr_save_offset + sp_off));
30306 mem = gen_frame_mem (SImode, addr);
30308 if (DEFAULT_ABI == ABI_ELFv2)
30310 /* In the ELFv2 ABI we generate separate CFI records for each
30311 CR field that was actually saved. They all point to the
30312 same 32-bit stack slot. */
30313 rtx crframe[8];
30314 int n_crframe = 0;
30316 for (i = 0; i < 8; i++)
30317 if (save_reg_p (CR0_REGNO + i))
30319 crframe[n_crframe]
30320 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
30322 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
30323 n_crframe++;
30326 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30327 gen_rtx_PARALLEL (VOIDmode,
30328 gen_rtvec_v (n_crframe, crframe)));
30330 else
30332 /* In other ABIs, by convention, we use a single CR regnum to
30333 represent the fact that all call-saved CR fields are saved.
30334 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30335 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
30336 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
30340 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30341 *separate* slots if the routine calls __builtin_eh_return, so
30342 that they can be independently restored by the unwinder. */
30343 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30345 int i, cr_off = info->ehcr_offset;
30346 rtx crsave;
30348 /* ??? We might get better performance by using multiple mfocrf
30349 instructions. */
30350 crsave = gen_rtx_REG (SImode, 0);
30351 emit_insn (gen_movesi_from_cr (crsave));
30353 for (i = 0; i < 8; i++)
30354 if (!call_used_regs[CR0_REGNO + i])
30356 rtvec p = rtvec_alloc (2);
30357 RTVEC_ELT (p, 0)
30358 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
30359 RTVEC_ELT (p, 1)
30360 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30362 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30364 RTX_FRAME_RELATED_P (insn) = 1;
30365 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30366 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
30367 sp_reg_rtx, cr_off + sp_off));
30369 cr_off += reg_size;
30373 /* Update stack and set back pointer unless this is V.4,
30374 for which it was done previously. */
30375 if (!WORLD_SAVE_P (info) && info->push_p
30376 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
30378 rtx ptr_reg = NULL;
30379 int ptr_off = 0;
30381 /* If saving altivec regs we need to be able to address all save
30382 locations using a 16-bit offset. */
30383 if ((strategy & SAVE_INLINE_VRS) == 0
30384 || (info->altivec_size != 0
30385 && (info->altivec_save_offset + info->altivec_size - 16
30386 + info->total_size - frame_off) > 32767)
30387 || (info->vrsave_size != 0
30388 && (info->vrsave_save_offset
30389 + info->total_size - frame_off) > 32767))
30391 int sel = SAVRES_SAVE | SAVRES_VR;
30392 unsigned ptr_regno = ptr_regno_for_savres (sel);
30394 if (using_static_chain_p
30395 && ptr_regno == STATIC_CHAIN_REGNUM)
30396 ptr_regno = 12;
30397 if (REGNO (frame_reg_rtx) != ptr_regno)
30398 START_USE (ptr_regno);
30399 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30400 frame_reg_rtx = ptr_reg;
30401 ptr_off = info->altivec_save_offset + info->altivec_size;
30402 frame_off = -ptr_off;
30404 else if (REGNO (frame_reg_rtx) == 1)
30405 frame_off = info->total_size;
30406 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
30407 ptr_reg, ptr_off);
30408 if (REGNO (frame_reg_rtx) == 12)
30409 sp_adjust = 0;
30410 sp_off = info->total_size;
30411 if (frame_reg_rtx != sp_reg_rtx)
30412 rs6000_emit_stack_tie (frame_reg_rtx, false);
30415 /* Set frame pointer, if needed. */
30416 if (frame_pointer_needed)
30418 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
30419 sp_reg_rtx);
30420 RTX_FRAME_RELATED_P (insn) = 1;
30423 /* Save AltiVec registers if needed. Save here because the red zone does
30424 not always include AltiVec registers. */
30425 if (!WORLD_SAVE_P (info)
30426 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
30428 int end_save = info->altivec_save_offset + info->altivec_size;
30429 int ptr_off;
30430 /* Oddly, the vector save/restore functions point r0 at the end
30431 of the save area, then use r11 or r12 to load offsets for
30432 [reg+reg] addressing. */
30433 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30434 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
30435 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30437 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30438 NOT_INUSE (0);
30439 if (scratch_regno == 12)
30440 sp_adjust = 0;
30441 if (end_save + frame_off != 0)
30443 rtx offset = GEN_INT (end_save + frame_off);
30445 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30447 else
30448 emit_move_insn (ptr_reg, frame_reg_rtx);
30450 ptr_off = -end_save;
30451 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30452 info->altivec_save_offset + ptr_off,
30453 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
30454 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
30455 NULL_RTX, NULL_RTX);
30456 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30458 /* The oddity mentioned above clobbered our frame reg. */
30459 emit_move_insn (frame_reg_rtx, ptr_reg);
30460 frame_off = ptr_off;
30463 else if (!WORLD_SAVE_P (info)
30464 && info->altivec_size != 0)
30466 int i;
30468 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30469 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30471 rtx areg, savereg, mem;
30472 HOST_WIDE_INT offset;
30474 offset = (info->altivec_save_offset + frame_off
30475 + 16 * (i - info->first_altivec_reg_save));
30477 savereg = gen_rtx_REG (V4SImode, i);
30479 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30481 mem = gen_frame_mem (V4SImode,
30482 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30483 GEN_INT (offset)));
30484 insn = emit_insn (gen_rtx_SET (mem, savereg));
30485 areg = NULL_RTX;
30487 else
30489 NOT_INUSE (0);
30490 areg = gen_rtx_REG (Pmode, 0);
30491 emit_move_insn (areg, GEN_INT (offset));
30493 /* AltiVec addressing mode is [reg+reg]. */
30494 mem = gen_frame_mem (V4SImode,
30495 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30497 /* Rather than emitting a generic move, force use of the stvx
30498 instruction, which we always want on ISA 2.07 (power8) systems.
30499 In particular we don't want xxpermdi/stxvd2x for little
30500 endian. */
30501 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30504 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30505 areg, GEN_INT (offset));
30509 /* VRSAVE is a bit vector representing which AltiVec registers
30510 are used. The OS uses this to determine which vector
30511 registers to save on a context switch. We need to save
30512 VRSAVE on the stack frame, add whatever AltiVec registers we
30513 used in this function, and do the corresponding magic in the
30514 epilogue. */
30516 if (!WORLD_SAVE_P (info)
30517 && info->vrsave_size != 0)
30519 rtx reg, vrsave;
30520 int offset;
30521 int save_regno;
30523 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30524 be using r12 as frame_reg_rtx and r11 as the static chain
30525 pointer for nested functions. */
30526 save_regno = 12;
30527 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30528 && !using_static_chain_p)
30529 save_regno = 11;
30530 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30532 save_regno = 11;
30533 if (using_static_chain_p)
30534 save_regno = 0;
30537 NOT_INUSE (save_regno);
30538 reg = gen_rtx_REG (SImode, save_regno);
30539 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30540 if (TARGET_MACHO)
30541 emit_insn (gen_get_vrsave_internal (reg));
30542 else
30543 emit_insn (gen_rtx_SET (reg, vrsave));
30545 /* Save VRSAVE. */
30546 offset = info->vrsave_save_offset + frame_off;
30547 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30549 /* Include the registers in the mask. */
30550 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30552 insn = emit_insn (generate_set_vrsave (reg, info, 0));
30555 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30556 if (!TARGET_SINGLE_PIC_BASE
30557 && ((TARGET_TOC && TARGET_MINIMAL_TOC
30558 && !constant_pool_empty_p ())
30559 || (DEFAULT_ABI == ABI_V4
30560 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30561 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30563 /* If emit_load_toc_table will use the link register, we need to save
30564 it. We use R12 for this purpose because emit_load_toc_table
30565 can use register 0. This allows us to use a plain 'blr' to return
30566 from the procedure more often. */
30567 int save_LR_around_toc_setup = (TARGET_ELF
30568 && DEFAULT_ABI == ABI_V4
30569 && flag_pic
30570 && ! info->lr_save_p
30571 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30572 if (save_LR_around_toc_setup)
30574 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30575 rtx tmp = gen_rtx_REG (Pmode, 12);
30577 sp_adjust = 0;
30578 insn = emit_move_insn (tmp, lr);
30579 RTX_FRAME_RELATED_P (insn) = 1;
30581 rs6000_emit_load_toc_table (TRUE);
30583 insn = emit_move_insn (lr, tmp);
30584 add_reg_note (insn, REG_CFA_RESTORE, lr);
30585 RTX_FRAME_RELATED_P (insn) = 1;
30587 else
30588 rs6000_emit_load_toc_table (TRUE);
30591 #if TARGET_MACHO
30592 if (!TARGET_SINGLE_PIC_BASE
30593 && DEFAULT_ABI == ABI_DARWIN
30594 && flag_pic && crtl->uses_pic_offset_table)
30596 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30597 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30599 /* Save and restore LR locally around this call (in R0). */
30600 if (!info->lr_save_p)
30601 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30603 emit_insn (gen_load_macho_picbase (src));
30605 emit_move_insn (gen_rtx_REG (Pmode,
30606 RS6000_PIC_OFFSET_TABLE_REGNUM),
30607 lr);
30609 if (!info->lr_save_p)
30610 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30612 #endif
30614 /* If we need to, save the TOC register after doing the stack setup.
30615 Do not emit eh frame info for this save. The unwinder wants info,
30616 conceptually attached to instructions in this function, about
30617 register values in the caller of this function. This R2 may have
30618 already been changed from the value in the caller.
30619 We don't attempt to write accurate DWARF EH frame info for R2
30620 because code emitted by gcc for a (non-pointer) function call
30621 doesn't save and restore R2. Instead, R2 is managed out-of-line
30622 by a linker generated plt call stub when the function resides in
30623 a shared library. This behavior is costly to describe in DWARF,
30624 both in terms of the size of DWARF info and the time taken in the
30625 unwinder to interpret it. R2 changes, apart from the
30626 calls_eh_return case earlier in this function, are handled by
30627 linux-unwind.h frob_update_context. */
30628 if (rs6000_save_toc_in_prologue_p ())
30630 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30631 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30634 if (using_split_stack && split_stack_arg_pointer_used_p ())
30636 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30637 __morestack was called, it left the arg pointer to the old
30638 stack in r29. Otherwise, the arg pointer is the top of the
30639 current frame. */
30640 cfun->machine->split_stack_argp_used = true;
30641 if (sp_adjust)
30643 rtx r12 = gen_rtx_REG (Pmode, 12);
30644 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30645 emit_insn_before (set_r12, sp_adjust);
30647 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30649 rtx r12 = gen_rtx_REG (Pmode, 12);
30650 if (frame_off == 0)
30651 emit_move_insn (r12, frame_reg_rtx);
30652 else
30653 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30655 if (info->push_p)
30657 rtx r12 = gen_rtx_REG (Pmode, 12);
30658 rtx r29 = gen_rtx_REG (Pmode, 29);
30659 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30660 rtx not_more = gen_label_rtx ();
30661 rtx jump;
30663 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30664 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30665 gen_rtx_LABEL_REF (VOIDmode, not_more),
30666 pc_rtx);
30667 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30668 JUMP_LABEL (jump) = not_more;
30669 LABEL_NUSES (not_more) += 1;
30670 emit_move_insn (r12, r29);
30671 emit_label (not_more);
30676 /* Output .extern statements for the save/restore routines we use. */
30678 static void
30679 rs6000_output_savres_externs (FILE *file)
30681 rs6000_stack_t *info = rs6000_stack_info ();
30683 if (TARGET_DEBUG_STACK)
30684 debug_stack_info (info);
30686 /* Write .extern for any function we will call to save and restore
30687 fp values. */
30688 if (info->first_fp_reg_save < 64
30689 && !TARGET_MACHO
30690 && !TARGET_ELF)
30692 char *name;
30693 int regno = info->first_fp_reg_save - 32;
30695 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30697 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30698 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30699 name = rs6000_savres_routine_name (info, regno, sel);
30700 fprintf (file, "\t.extern %s\n", name);
30702 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30704 bool lr = (info->savres_strategy
30705 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30706 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30707 name = rs6000_savres_routine_name (info, regno, sel);
30708 fprintf (file, "\t.extern %s\n", name);
30713 /* Write function prologue. */
30715 static void
30716 rs6000_output_function_prologue (FILE *file)
30718 if (!cfun->is_thunk)
30719 rs6000_output_savres_externs (file);
30721 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30722 immediately after the global entry point label. */
30723 if (rs6000_global_entry_point_needed_p ())
30725 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30727 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30729 if (TARGET_CMODEL != CMODEL_LARGE)
30731 /* In the small and medium code models, we assume the TOC is less
30732 2 GB away from the text section, so it can be computed via the
30733 following two-instruction sequence. */
30734 char buf[256];
30736 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30737 fprintf (file, "0:\taddis 2,12,.TOC.-");
30738 assemble_name (file, buf);
30739 fprintf (file, "@ha\n");
30740 fprintf (file, "\taddi 2,2,.TOC.-");
30741 assemble_name (file, buf);
30742 fprintf (file, "@l\n");
30744 else
30746 /* In the large code model, we allow arbitrary offsets between the
30747 TOC and the text section, so we have to load the offset from
30748 memory. The data field is emitted directly before the global
30749 entry point in rs6000_elf_declare_function_name. */
30750 char buf[256];
30752 #ifdef HAVE_AS_ENTRY_MARKERS
30753 /* If supported by the linker, emit a marker relocation. If the
30754 total code size of the final executable or shared library
30755 happens to fit into 2 GB after all, the linker will replace
30756 this code sequence with the sequence for the small or medium
30757 code model. */
30758 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30759 #endif
30760 fprintf (file, "\tld 2,");
30761 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30762 assemble_name (file, buf);
30763 fprintf (file, "-");
30764 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30765 assemble_name (file, buf);
30766 fprintf (file, "(12)\n");
30767 fprintf (file, "\tadd 2,2,12\n");
30770 fputs ("\t.localentry\t", file);
30771 assemble_name (file, name);
30772 fputs (",.-", file);
30773 assemble_name (file, name);
30774 fputs ("\n", file);
30777 /* Output -mprofile-kernel code. This needs to be done here instead of
30778 in output_function_profile since it must go after the ELFv2 ABI
30779 local entry point. */
30780 if (TARGET_PROFILE_KERNEL && crtl->profile)
30782 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30783 gcc_assert (!TARGET_32BIT);
30785 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30787 /* In the ELFv2 ABI we have no compiler stack word. It must be
30788 the resposibility of _mcount to preserve the static chain
30789 register if required. */
30790 if (DEFAULT_ABI != ABI_ELFv2
30791 && cfun->static_chain_decl != NULL)
30793 asm_fprintf (file, "\tstd %s,24(%s)\n",
30794 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30795 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30796 asm_fprintf (file, "\tld %s,24(%s)\n",
30797 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30799 else
30800 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30803 rs6000_pic_labelno++;
30806 /* -mprofile-kernel code calls mcount before the function prolog,
30807 so a profiled leaf function should stay a leaf function. */
30808 static bool
30809 rs6000_keep_leaf_when_profiled ()
30811 return TARGET_PROFILE_KERNEL;
30814 /* Non-zero if vmx regs are restored before the frame pop, zero if
30815 we restore after the pop when possible. */
30816 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30818 /* Restoring cr is a two step process: loading a reg from the frame
30819 save, then moving the reg to cr. For ABI_V4 we must let the
30820 unwinder know that the stack location is no longer valid at or
30821 before the stack deallocation, but we can't emit a cfa_restore for
30822 cr at the stack deallocation like we do for other registers.
30823 The trouble is that it is possible for the move to cr to be
30824 scheduled after the stack deallocation. So say exactly where cr
30825 is located on each of the two insns. */
30827 static rtx
30828 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30830 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30831 rtx reg = gen_rtx_REG (SImode, regno);
30832 rtx_insn *insn = emit_move_insn (reg, mem);
30834 if (!exit_func && DEFAULT_ABI == ABI_V4)
30836 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30837 rtx set = gen_rtx_SET (reg, cr);
30839 add_reg_note (insn, REG_CFA_REGISTER, set);
30840 RTX_FRAME_RELATED_P (insn) = 1;
30842 return reg;
30845 /* Reload CR from REG. */
30847 static void
30848 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30850 int count = 0;
30851 int i;
30853 if (using_mfcr_multiple)
30855 for (i = 0; i < 8; i++)
30856 if (save_reg_p (CR0_REGNO + i))
30857 count++;
30858 gcc_assert (count);
30861 if (using_mfcr_multiple && count > 1)
30863 rtx_insn *insn;
30864 rtvec p;
30865 int ndx;
30867 p = rtvec_alloc (count);
30869 ndx = 0;
30870 for (i = 0; i < 8; i++)
30871 if (save_reg_p (CR0_REGNO + i))
30873 rtvec r = rtvec_alloc (2);
30874 RTVEC_ELT (r, 0) = reg;
30875 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30876 RTVEC_ELT (p, ndx) =
30877 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30878 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30879 ndx++;
30881 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30882 gcc_assert (ndx == count);
30884 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30885 CR field separately. */
30886 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30888 for (i = 0; i < 8; i++)
30889 if (save_reg_p (CR0_REGNO + i))
30890 add_reg_note (insn, REG_CFA_RESTORE,
30891 gen_rtx_REG (SImode, CR0_REGNO + i));
30893 RTX_FRAME_RELATED_P (insn) = 1;
30896 else
30897 for (i = 0; i < 8; i++)
30898 if (save_reg_p (CR0_REGNO + i))
30900 rtx insn = emit_insn (gen_movsi_to_cr_one
30901 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30903 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30904 CR field separately, attached to the insn that in fact
30905 restores this particular CR field. */
30906 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30908 add_reg_note (insn, REG_CFA_RESTORE,
30909 gen_rtx_REG (SImode, CR0_REGNO + i));
30911 RTX_FRAME_RELATED_P (insn) = 1;
30915 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30916 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30917 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30919 rtx_insn *insn = get_last_insn ();
30920 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30922 add_reg_note (insn, REG_CFA_RESTORE, cr);
30923 RTX_FRAME_RELATED_P (insn) = 1;
30927 /* Like cr, the move to lr instruction can be scheduled after the
30928 stack deallocation, but unlike cr, its stack frame save is still
30929 valid. So we only need to emit the cfa_restore on the correct
30930 instruction. */
30932 static void
30933 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30935 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30936 rtx reg = gen_rtx_REG (Pmode, regno);
30938 emit_move_insn (reg, mem);
30941 static void
30942 restore_saved_lr (int regno, bool exit_func)
30944 rtx reg = gen_rtx_REG (Pmode, regno);
30945 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30946 rtx_insn *insn = emit_move_insn (lr, reg);
30948 if (!exit_func && flag_shrink_wrap)
30950 add_reg_note (insn, REG_CFA_RESTORE, lr);
30951 RTX_FRAME_RELATED_P (insn) = 1;
30955 static rtx
30956 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30958 if (DEFAULT_ABI == ABI_ELFv2)
30960 int i;
30961 for (i = 0; i < 8; i++)
30962 if (save_reg_p (CR0_REGNO + i))
30964 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30965 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30966 cfa_restores);
30969 else if (info->cr_save_p)
30970 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30971 gen_rtx_REG (SImode, CR2_REGNO),
30972 cfa_restores);
30974 if (info->lr_save_p)
30975 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30976 gen_rtx_REG (Pmode, LR_REGNO),
30977 cfa_restores);
30978 return cfa_restores;
30981 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30982 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30983 below stack pointer not cloberred by signals. */
30985 static inline bool
30986 offset_below_red_zone_p (HOST_WIDE_INT offset)
30988 return offset < (DEFAULT_ABI == ABI_V4
30990 : TARGET_32BIT ? -220 : -288);
30993 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30995 static void
30996 emit_cfa_restores (rtx cfa_restores)
30998 rtx_insn *insn = get_last_insn ();
30999 rtx *loc = &REG_NOTES (insn);
31001 while (*loc)
31002 loc = &XEXP (*loc, 1);
31003 *loc = cfa_restores;
31004 RTX_FRAME_RELATED_P (insn) = 1;
31007 /* Emit function epilogue as insns. */
31009 void
31010 rs6000_emit_epilogue (int sibcall)
31012 rs6000_stack_t *info;
31013 int restoring_GPRs_inline;
31014 int restoring_FPRs_inline;
31015 int using_load_multiple;
31016 int using_mtcr_multiple;
31017 int use_backchain_to_restore_sp;
31018 int restore_lr;
31019 int strategy;
31020 HOST_WIDE_INT frame_off = 0;
31021 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
31022 rtx frame_reg_rtx = sp_reg_rtx;
31023 rtx cfa_restores = NULL_RTX;
31024 rtx insn;
31025 rtx cr_save_reg = NULL_RTX;
31026 machine_mode reg_mode = Pmode;
31027 int reg_size = TARGET_32BIT ? 4 : 8;
31028 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
31029 ? DFmode : SFmode;
31030 int fp_reg_size = 8;
31031 int i;
31032 bool exit_func;
31033 unsigned ptr_regno;
31035 info = rs6000_stack_info ();
31037 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
31039 reg_mode = V2SImode;
31040 reg_size = 8;
31043 strategy = info->savres_strategy;
31044 using_load_multiple = strategy & REST_MULTIPLE;
31045 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
31046 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
31047 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
31048 || rs6000_cpu == PROCESSOR_PPC603
31049 || rs6000_cpu == PROCESSOR_PPC750
31050 || optimize_size);
31051 /* Restore via the backchain when we have a large frame, since this
31052 is more efficient than an addis, addi pair. The second condition
31053 here will not trigger at the moment; We don't actually need a
31054 frame pointer for alloca, but the generic parts of the compiler
31055 give us one anyway. */
31056 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
31057 ? info->lr_save_offset
31058 : 0) > 32767
31059 || (cfun->calls_alloca
31060 && !frame_pointer_needed));
31061 restore_lr = (info->lr_save_p
31062 && (restoring_FPRs_inline
31063 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
31064 && (restoring_GPRs_inline
31065 || info->first_fp_reg_save < 64)
31066 && !cfun->machine->lr_is_wrapped_separately);
31069 if (WORLD_SAVE_P (info))
31071 int i, j;
31072 char rname[30];
31073 const char *alloc_rname;
31074 rtvec p;
31076 /* eh_rest_world_r10 will return to the location saved in the LR
31077 stack slot (which is not likely to be our caller.)
31078 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
31079 rest_world is similar, except any R10 parameter is ignored.
31080 The exception-handling stuff that was here in 2.95 is no
31081 longer necessary. */
31083 p = rtvec_alloc (9
31084 + 32 - info->first_gp_reg_save
31085 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
31086 + 63 + 1 - info->first_fp_reg_save);
31088 strcpy (rname, ((crtl->calls_eh_return) ?
31089 "*eh_rest_world_r10" : "*rest_world"));
31090 alloc_rname = ggc_strdup (rname);
31092 j = 0;
31093 RTVEC_ELT (p, j++) = ret_rtx;
31094 RTVEC_ELT (p, j++)
31095 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
31096 /* The instruction pattern requires a clobber here;
31097 it is shared with the restVEC helper. */
31098 RTVEC_ELT (p, j++)
31099 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
31102 /* CR register traditionally saved as CR2. */
31103 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
31104 RTVEC_ELT (p, j++)
31105 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
31106 if (flag_shrink_wrap)
31108 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
31109 gen_rtx_REG (Pmode, LR_REGNO),
31110 cfa_restores);
31111 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31115 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31117 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31118 RTVEC_ELT (p, j++)
31119 = gen_frame_load (reg,
31120 frame_reg_rtx, info->gp_save_offset + reg_size * i);
31121 if (flag_shrink_wrap)
31122 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31124 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
31126 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
31127 RTVEC_ELT (p, j++)
31128 = gen_frame_load (reg,
31129 frame_reg_rtx, info->altivec_save_offset + 16 * i);
31130 if (flag_shrink_wrap)
31131 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31133 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
31135 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31136 ? DFmode : SFmode),
31137 info->first_fp_reg_save + i);
31138 RTVEC_ELT (p, j++)
31139 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
31140 if (flag_shrink_wrap)
31141 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31143 RTVEC_ELT (p, j++)
31144 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
31145 RTVEC_ELT (p, j++)
31146 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
31147 RTVEC_ELT (p, j++)
31148 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
31149 RTVEC_ELT (p, j++)
31150 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
31151 RTVEC_ELT (p, j++)
31152 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
31153 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31155 if (flag_shrink_wrap)
31157 REG_NOTES (insn) = cfa_restores;
31158 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31159 RTX_FRAME_RELATED_P (insn) = 1;
31161 return;
31164 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31165 if (info->push_p)
31166 frame_off = info->total_size;
31168 /* Restore AltiVec registers if we must do so before adjusting the
31169 stack. */
31170 if (info->altivec_size != 0
31171 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31172 || (DEFAULT_ABI != ABI_V4
31173 && offset_below_red_zone_p (info->altivec_save_offset))))
31175 int i;
31176 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31178 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
31179 if (use_backchain_to_restore_sp)
31181 int frame_regno = 11;
31183 if ((strategy & REST_INLINE_VRS) == 0)
31185 /* Of r11 and r12, select the one not clobbered by an
31186 out-of-line restore function for the frame register. */
31187 frame_regno = 11 + 12 - scratch_regno;
31189 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
31190 emit_move_insn (frame_reg_rtx,
31191 gen_rtx_MEM (Pmode, sp_reg_rtx));
31192 frame_off = 0;
31194 else if (frame_pointer_needed)
31195 frame_reg_rtx = hard_frame_pointer_rtx;
31197 if ((strategy & REST_INLINE_VRS) == 0)
31199 int end_save = info->altivec_save_offset + info->altivec_size;
31200 int ptr_off;
31201 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31202 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31204 if (end_save + frame_off != 0)
31206 rtx offset = GEN_INT (end_save + frame_off);
31208 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31210 else
31211 emit_move_insn (ptr_reg, frame_reg_rtx);
31213 ptr_off = -end_save;
31214 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31215 info->altivec_save_offset + ptr_off,
31216 0, V4SImode, SAVRES_VR);
31218 else
31220 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31221 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31223 rtx addr, areg, mem, insn;
31224 rtx reg = gen_rtx_REG (V4SImode, i);
31225 HOST_WIDE_INT offset
31226 = (info->altivec_save_offset + frame_off
31227 + 16 * (i - info->first_altivec_reg_save));
31229 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31231 mem = gen_frame_mem (V4SImode,
31232 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31233 GEN_INT (offset)));
31234 insn = gen_rtx_SET (reg, mem);
31236 else
31238 areg = gen_rtx_REG (Pmode, 0);
31239 emit_move_insn (areg, GEN_INT (offset));
31241 /* AltiVec addressing mode is [reg+reg]. */
31242 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31243 mem = gen_frame_mem (V4SImode, addr);
31245 /* Rather than emitting a generic move, force use of the
31246 lvx instruction, which we always want. In particular we
31247 don't want lxvd2x/xxpermdi for little endian. */
31248 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31251 (void) emit_insn (insn);
31255 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31256 if (((strategy & REST_INLINE_VRS) == 0
31257 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31258 && (flag_shrink_wrap
31259 || (offset_below_red_zone_p
31260 (info->altivec_save_offset
31261 + 16 * (i - info->first_altivec_reg_save)))))
31263 rtx reg = gen_rtx_REG (V4SImode, i);
31264 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31268 /* Restore VRSAVE if we must do so before adjusting the stack. */
31269 if (info->vrsave_size != 0
31270 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31271 || (DEFAULT_ABI != ABI_V4
31272 && offset_below_red_zone_p (info->vrsave_save_offset))))
31274 rtx reg;
31276 if (frame_reg_rtx == sp_reg_rtx)
31278 if (use_backchain_to_restore_sp)
31280 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31281 emit_move_insn (frame_reg_rtx,
31282 gen_rtx_MEM (Pmode, sp_reg_rtx));
31283 frame_off = 0;
31285 else if (frame_pointer_needed)
31286 frame_reg_rtx = hard_frame_pointer_rtx;
31289 reg = gen_rtx_REG (SImode, 12);
31290 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31291 info->vrsave_save_offset + frame_off));
31293 emit_insn (generate_set_vrsave (reg, info, 1));
31296 insn = NULL_RTX;
31297 /* If we have a large stack frame, restore the old stack pointer
31298 using the backchain. */
31299 if (use_backchain_to_restore_sp)
31301 if (frame_reg_rtx == sp_reg_rtx)
31303 /* Under V.4, don't reset the stack pointer until after we're done
31304 loading the saved registers. */
31305 if (DEFAULT_ABI == ABI_V4)
31306 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31308 insn = emit_move_insn (frame_reg_rtx,
31309 gen_rtx_MEM (Pmode, sp_reg_rtx));
31310 frame_off = 0;
31312 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31313 && DEFAULT_ABI == ABI_V4)
31314 /* frame_reg_rtx has been set up by the altivec restore. */
31316 else
31318 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
31319 frame_reg_rtx = sp_reg_rtx;
31322 /* If we have a frame pointer, we can restore the old stack pointer
31323 from it. */
31324 else if (frame_pointer_needed)
31326 frame_reg_rtx = sp_reg_rtx;
31327 if (DEFAULT_ABI == ABI_V4)
31328 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31329 /* Prevent reordering memory accesses against stack pointer restore. */
31330 else if (cfun->calls_alloca
31331 || offset_below_red_zone_p (-info->total_size))
31332 rs6000_emit_stack_tie (frame_reg_rtx, true);
31334 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
31335 GEN_INT (info->total_size)));
31336 frame_off = 0;
31338 else if (info->push_p
31339 && DEFAULT_ABI != ABI_V4
31340 && !crtl->calls_eh_return)
31342 /* Prevent reordering memory accesses against stack pointer restore. */
31343 if (cfun->calls_alloca
31344 || offset_below_red_zone_p (-info->total_size))
31345 rs6000_emit_stack_tie (frame_reg_rtx, false);
31346 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
31347 GEN_INT (info->total_size)));
31348 frame_off = 0;
31350 if (insn && frame_reg_rtx == sp_reg_rtx)
31352 if (cfa_restores)
31354 REG_NOTES (insn) = cfa_restores;
31355 cfa_restores = NULL_RTX;
31357 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31358 RTX_FRAME_RELATED_P (insn) = 1;
31361 /* Restore AltiVec registers if we have not done so already. */
31362 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31363 && info->altivec_size != 0
31364 && (DEFAULT_ABI == ABI_V4
31365 || !offset_below_red_zone_p (info->altivec_save_offset)))
31367 int i;
31369 if ((strategy & REST_INLINE_VRS) == 0)
31371 int end_save = info->altivec_save_offset + info->altivec_size;
31372 int ptr_off;
31373 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31374 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31375 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31377 if (end_save + frame_off != 0)
31379 rtx offset = GEN_INT (end_save + frame_off);
31381 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31383 else
31384 emit_move_insn (ptr_reg, frame_reg_rtx);
31386 ptr_off = -end_save;
31387 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31388 info->altivec_save_offset + ptr_off,
31389 0, V4SImode, SAVRES_VR);
31390 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
31392 /* Frame reg was clobbered by out-of-line save. Restore it
31393 from ptr_reg, and if we are calling out-of-line gpr or
31394 fpr restore set up the correct pointer and offset. */
31395 unsigned newptr_regno = 1;
31396 if (!restoring_GPRs_inline)
31398 bool lr = info->gp_save_offset + info->gp_size == 0;
31399 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31400 newptr_regno = ptr_regno_for_savres (sel);
31401 end_save = info->gp_save_offset + info->gp_size;
31403 else if (!restoring_FPRs_inline)
31405 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
31406 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31407 newptr_regno = ptr_regno_for_savres (sel);
31408 end_save = info->fp_save_offset + info->fp_size;
31411 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
31412 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
31414 if (end_save + ptr_off != 0)
31416 rtx offset = GEN_INT (end_save + ptr_off);
31418 frame_off = -end_save;
31419 if (TARGET_32BIT)
31420 emit_insn (gen_addsi3_carry (frame_reg_rtx,
31421 ptr_reg, offset));
31422 else
31423 emit_insn (gen_adddi3_carry (frame_reg_rtx,
31424 ptr_reg, offset));
31426 else
31428 frame_off = ptr_off;
31429 emit_move_insn (frame_reg_rtx, ptr_reg);
31433 else
31435 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31436 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31438 rtx addr, areg, mem, insn;
31439 rtx reg = gen_rtx_REG (V4SImode, i);
31440 HOST_WIDE_INT offset
31441 = (info->altivec_save_offset + frame_off
31442 + 16 * (i - info->first_altivec_reg_save));
31444 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31446 mem = gen_frame_mem (V4SImode,
31447 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31448 GEN_INT (offset)));
31449 insn = gen_rtx_SET (reg, mem);
31451 else
31453 areg = gen_rtx_REG (Pmode, 0);
31454 emit_move_insn (areg, GEN_INT (offset));
31456 /* AltiVec addressing mode is [reg+reg]. */
31457 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31458 mem = gen_frame_mem (V4SImode, addr);
31460 /* Rather than emitting a generic move, force use of the
31461 lvx instruction, which we always want. In particular we
31462 don't want lxvd2x/xxpermdi for little endian. */
31463 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31466 (void) emit_insn (insn);
31470 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31471 if (((strategy & REST_INLINE_VRS) == 0
31472 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31473 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
31475 rtx reg = gen_rtx_REG (V4SImode, i);
31476 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31480 /* Restore VRSAVE if we have not done so already. */
31481 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31482 && info->vrsave_size != 0
31483 && (DEFAULT_ABI == ABI_V4
31484 || !offset_below_red_zone_p (info->vrsave_save_offset)))
31486 rtx reg;
31488 reg = gen_rtx_REG (SImode, 12);
31489 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31490 info->vrsave_save_offset + frame_off));
31492 emit_insn (generate_set_vrsave (reg, info, 1));
31495 /* If we exit by an out-of-line restore function on ABI_V4 then that
31496 function will deallocate the stack, so we don't need to worry
31497 about the unwinder restoring cr from an invalid stack frame
31498 location. */
31499 exit_func = (!restoring_FPRs_inline
31500 || (!restoring_GPRs_inline
31501 && info->first_fp_reg_save == 64));
31503 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31504 *separate* slots if the routine calls __builtin_eh_return, so
31505 that they can be independently restored by the unwinder. */
31506 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31508 int i, cr_off = info->ehcr_offset;
31510 for (i = 0; i < 8; i++)
31511 if (!call_used_regs[CR0_REGNO + i])
31513 rtx reg = gen_rtx_REG (SImode, 0);
31514 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31515 cr_off + frame_off));
31517 insn = emit_insn (gen_movsi_to_cr_one
31518 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31520 if (!exit_func && flag_shrink_wrap)
31522 add_reg_note (insn, REG_CFA_RESTORE,
31523 gen_rtx_REG (SImode, CR0_REGNO + i));
31525 RTX_FRAME_RELATED_P (insn) = 1;
31528 cr_off += reg_size;
31532 /* Get the old lr if we saved it. If we are restoring registers
31533 out-of-line, then the out-of-line routines can do this for us. */
31534 if (restore_lr && restoring_GPRs_inline)
31535 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31537 /* Get the old cr if we saved it. */
31538 if (info->cr_save_p)
31540 unsigned cr_save_regno = 12;
31542 if (!restoring_GPRs_inline)
31544 /* Ensure we don't use the register used by the out-of-line
31545 gpr register restore below. */
31546 bool lr = info->gp_save_offset + info->gp_size == 0;
31547 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31548 int gpr_ptr_regno = ptr_regno_for_savres (sel);
31550 if (gpr_ptr_regno == 12)
31551 cr_save_regno = 11;
31552 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31554 else if (REGNO (frame_reg_rtx) == 12)
31555 cr_save_regno = 11;
31557 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31558 info->cr_save_offset + frame_off,
31559 exit_func);
31562 /* Set LR here to try to overlap restores below. */
31563 if (restore_lr && restoring_GPRs_inline)
31564 restore_saved_lr (0, exit_func);
31566 /* Load exception handler data registers, if needed. */
31567 if (crtl->calls_eh_return)
31569 unsigned int i, regno;
31571 if (TARGET_AIX)
31573 rtx reg = gen_rtx_REG (reg_mode, 2);
31574 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31575 frame_off + RS6000_TOC_SAVE_SLOT));
31578 for (i = 0; ; ++i)
31580 rtx mem;
31582 regno = EH_RETURN_DATA_REGNO (i);
31583 if (regno == INVALID_REGNUM)
31584 break;
31586 /* Note: possible use of r0 here to address SPE regs. */
31587 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31588 info->ehrd_offset + frame_off
31589 + reg_size * (int) i);
31591 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31595 /* Restore GPRs. This is done as a PARALLEL if we are using
31596 the load-multiple instructions. */
31597 if (TARGET_SPE_ABI
31598 && info->spe_64bit_regs_used
31599 && info->first_gp_reg_save != 32)
31601 /* Determine whether we can address all of the registers that need
31602 to be saved with an offset from frame_reg_rtx that fits in
31603 the small const field for SPE memory instructions. */
31604 int spe_regs_addressable
31605 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31606 + reg_size * (32 - info->first_gp_reg_save - 1))
31607 && restoring_GPRs_inline);
31609 if (!spe_regs_addressable)
31611 int ool_adjust = 0;
31612 rtx old_frame_reg_rtx = frame_reg_rtx;
31613 /* Make r11 point to the start of the SPE save area. We worried about
31614 not clobbering it when we were saving registers in the prologue.
31615 There's no need to worry here because the static chain is passed
31616 anew to every function. */
31618 if (!restoring_GPRs_inline)
31619 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31620 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31621 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31622 GEN_INT (info->spe_gp_save_offset
31623 + frame_off
31624 - ool_adjust)));
31625 /* Keep the invariant that frame_reg_rtx + frame_off points
31626 at the top of the stack frame. */
31627 frame_off = -info->spe_gp_save_offset + ool_adjust;
31630 if (restoring_GPRs_inline)
31632 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31634 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31635 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31637 rtx offset, addr, mem, reg;
31639 /* We're doing all this to ensure that the immediate offset
31640 fits into the immediate field of 'evldd'. */
31641 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31643 offset = GEN_INT (spe_offset + reg_size * i);
31644 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31645 mem = gen_rtx_MEM (V2SImode, addr);
31646 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31648 emit_move_insn (reg, mem);
31651 else
31652 rs6000_emit_savres_rtx (info, frame_reg_rtx,
31653 info->spe_gp_save_offset + frame_off,
31654 info->lr_save_offset + frame_off,
31655 reg_mode,
31656 SAVRES_GPR | SAVRES_LR);
31658 else if (!restoring_GPRs_inline)
31660 /* We are jumping to an out-of-line function. */
31661 rtx ptr_reg;
31662 int end_save = info->gp_save_offset + info->gp_size;
31663 bool can_use_exit = end_save == 0;
31664 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31665 int ptr_off;
31667 /* Emit stack reset code if we need it. */
31668 ptr_regno = ptr_regno_for_savres (sel);
31669 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31670 if (can_use_exit)
31671 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31672 else if (end_save + frame_off != 0)
31673 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31674 GEN_INT (end_save + frame_off)));
31675 else if (REGNO (frame_reg_rtx) != ptr_regno)
31676 emit_move_insn (ptr_reg, frame_reg_rtx);
31677 if (REGNO (frame_reg_rtx) == ptr_regno)
31678 frame_off = -end_save;
31680 if (can_use_exit && info->cr_save_p)
31681 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31683 ptr_off = -end_save;
31684 rs6000_emit_savres_rtx (info, ptr_reg,
31685 info->gp_save_offset + ptr_off,
31686 info->lr_save_offset + ptr_off,
31687 reg_mode, sel);
31689 else if (using_load_multiple)
31691 rtvec p;
31692 p = rtvec_alloc (32 - info->first_gp_reg_save);
31693 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31694 RTVEC_ELT (p, i)
31695 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31696 frame_reg_rtx,
31697 info->gp_save_offset + frame_off + reg_size * i);
31698 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31700 else
31702 int offset = info->gp_save_offset + frame_off;
31703 for (i = info->first_gp_reg_save; i < 32; i++)
31705 if (rs6000_reg_live_or_pic_offset_p (i)
31706 && !cfun->machine->gpr_is_wrapped_separately[i])
31708 rtx reg = gen_rtx_REG (reg_mode, i);
31709 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31712 offset += reg_size;
31716 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31718 /* If the frame pointer was used then we can't delay emitting
31719 a REG_CFA_DEF_CFA note. This must happen on the insn that
31720 restores the frame pointer, r31. We may have already emitted
31721 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31722 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31723 be harmless if emitted. */
31724 if (frame_pointer_needed)
31726 insn = get_last_insn ();
31727 add_reg_note (insn, REG_CFA_DEF_CFA,
31728 plus_constant (Pmode, frame_reg_rtx, frame_off));
31729 RTX_FRAME_RELATED_P (insn) = 1;
31732 /* Set up cfa_restores. We always need these when
31733 shrink-wrapping. If not shrink-wrapping then we only need
31734 the cfa_restore when the stack location is no longer valid.
31735 The cfa_restores must be emitted on or before the insn that
31736 invalidates the stack, and of course must not be emitted
31737 before the insn that actually does the restore. The latter
31738 is why it is a bad idea to emit the cfa_restores as a group
31739 on the last instruction here that actually does a restore:
31740 That insn may be reordered with respect to others doing
31741 restores. */
31742 if (flag_shrink_wrap
31743 && !restoring_GPRs_inline
31744 && info->first_fp_reg_save == 64)
31745 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31747 for (i = info->first_gp_reg_save; i < 32; i++)
31748 if (!restoring_GPRs_inline
31749 || using_load_multiple
31750 || rs6000_reg_live_or_pic_offset_p (i))
31752 if (cfun->machine->gpr_is_wrapped_separately[i])
31753 continue;
31755 rtx reg = gen_rtx_REG (reg_mode, i);
31756 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31760 if (!restoring_GPRs_inline
31761 && info->first_fp_reg_save == 64)
31763 /* We are jumping to an out-of-line function. */
31764 if (cfa_restores)
31765 emit_cfa_restores (cfa_restores);
31766 return;
31769 if (restore_lr && !restoring_GPRs_inline)
31771 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31772 restore_saved_lr (0, exit_func);
31775 /* Restore fpr's if we need to do it without calling a function. */
31776 if (restoring_FPRs_inline)
31778 int offset = info->fp_save_offset + frame_off;
31779 for (i = info->first_fp_reg_save; i < 64; i++)
31781 if (save_reg_p (i)
31782 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
31784 rtx reg = gen_rtx_REG (fp_reg_mode, i);
31785 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31786 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31787 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
31788 cfa_restores);
31791 offset += fp_reg_size;
31795 /* If we saved cr, restore it here. Just those that were used. */
31796 if (info->cr_save_p)
31797 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31799 /* If this is V.4, unwind the stack pointer after all of the loads
31800 have been done, or set up r11 if we are restoring fp out of line. */
31801 ptr_regno = 1;
31802 if (!restoring_FPRs_inline)
31804 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31805 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31806 ptr_regno = ptr_regno_for_savres (sel);
31809 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31810 if (REGNO (frame_reg_rtx) == ptr_regno)
31811 frame_off = 0;
31813 if (insn && restoring_FPRs_inline)
31815 if (cfa_restores)
31817 REG_NOTES (insn) = cfa_restores;
31818 cfa_restores = NULL_RTX;
31820 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31821 RTX_FRAME_RELATED_P (insn) = 1;
31824 if (crtl->calls_eh_return)
31826 rtx sa = EH_RETURN_STACKADJ_RTX;
31827 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31830 if (!sibcall && restoring_FPRs_inline)
31832 if (cfa_restores)
31834 /* We can't hang the cfa_restores off a simple return,
31835 since the shrink-wrap code sometimes uses an existing
31836 return. This means there might be a path from
31837 pre-prologue code to this return, and dwarf2cfi code
31838 wants the eh_frame unwinder state to be the same on
31839 all paths to any point. So we need to emit the
31840 cfa_restores before the return. For -m64 we really
31841 don't need epilogue cfa_restores at all, except for
31842 this irritating dwarf2cfi with shrink-wrap
31843 requirement; The stack red-zone means eh_frame info
31844 from the prologue telling the unwinder to restore
31845 from the stack is perfectly good right to the end of
31846 the function. */
31847 emit_insn (gen_blockage ());
31848 emit_cfa_restores (cfa_restores);
31849 cfa_restores = NULL_RTX;
31852 emit_jump_insn (targetm.gen_simple_return ());
31855 if (!sibcall && !restoring_FPRs_inline)
31857 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31858 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31859 int elt = 0;
31860 RTVEC_ELT (p, elt++) = ret_rtx;
31861 if (lr)
31862 RTVEC_ELT (p, elt++)
31863 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31865 /* We have to restore more than two FP registers, so branch to the
31866 restore function. It will return to our caller. */
31867 int i;
31868 int reg;
31869 rtx sym;
31871 if (flag_shrink_wrap)
31872 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31874 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31875 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31876 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31877 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31879 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31881 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31883 RTVEC_ELT (p, elt++)
31884 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31885 if (flag_shrink_wrap)
31886 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31889 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31892 if (cfa_restores)
31894 if (sibcall)
31895 /* Ensure the cfa_restores are hung off an insn that won't
31896 be reordered above other restores. */
31897 emit_insn (gen_blockage ());
31899 emit_cfa_restores (cfa_restores);
31903 /* Write function epilogue. */
31905 static void
31906 rs6000_output_function_epilogue (FILE *file)
31908 #if TARGET_MACHO
31909 macho_branch_islands ();
31912 rtx_insn *insn = get_last_insn ();
31913 rtx_insn *deleted_debug_label = NULL;
31915 /* Mach-O doesn't support labels at the end of objects, so if
31916 it looks like we might want one, take special action.
31918 First, collect any sequence of deleted debug labels. */
31919 while (insn
31920 && NOTE_P (insn)
31921 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31923 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31924 notes only, instead set their CODE_LABEL_NUMBER to -1,
31925 otherwise there would be code generation differences
31926 in between -g and -g0. */
31927 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31928 deleted_debug_label = insn;
31929 insn = PREV_INSN (insn);
31932 /* Second, if we have:
31933 label:
31934 barrier
31935 then this needs to be detected, so skip past the barrier. */
31937 if (insn && BARRIER_P (insn))
31938 insn = PREV_INSN (insn);
31940 /* Up to now we've only seen notes or barriers. */
31941 if (insn)
31943 if (LABEL_P (insn)
31944 || (NOTE_P (insn)
31945 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31946 /* Trailing label: <barrier>. */
31947 fputs ("\tnop\n", file);
31948 else
31950 /* Lastly, see if we have a completely empty function body. */
31951 while (insn && ! INSN_P (insn))
31952 insn = PREV_INSN (insn);
31953 /* If we don't find any insns, we've got an empty function body;
31954 I.e. completely empty - without a return or branch. This is
31955 taken as the case where a function body has been removed
31956 because it contains an inline __builtin_unreachable(). GCC
31957 states that reaching __builtin_unreachable() means UB so we're
31958 not obliged to do anything special; however, we want
31959 non-zero-sized function bodies. To meet this, and help the
31960 user out, let's trap the case. */
31961 if (insn == NULL)
31962 fputs ("\ttrap\n", file);
31965 else if (deleted_debug_label)
31966 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31967 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31968 CODE_LABEL_NUMBER (insn) = -1;
31970 #endif
31972 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31973 on its format.
31975 We don't output a traceback table if -finhibit-size-directive was
31976 used. The documentation for -finhibit-size-directive reads
31977 ``don't output a @code{.size} assembler directive, or anything
31978 else that would cause trouble if the function is split in the
31979 middle, and the two halves are placed at locations far apart in
31980 memory.'' The traceback table has this property, since it
31981 includes the offset from the start of the function to the
31982 traceback table itself.
31984 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31985 different traceback table. */
31986 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31987 && ! flag_inhibit_size_directive
31988 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31990 const char *fname = NULL;
31991 const char *language_string = lang_hooks.name;
31992 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31993 int i;
31994 int optional_tbtab;
31995 rs6000_stack_t *info = rs6000_stack_info ();
31997 if (rs6000_traceback == traceback_full)
31998 optional_tbtab = 1;
31999 else if (rs6000_traceback == traceback_part)
32000 optional_tbtab = 0;
32001 else
32002 optional_tbtab = !optimize_size && !TARGET_ELF;
32004 if (optional_tbtab)
32006 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
32007 while (*fname == '.') /* V.4 encodes . in the name */
32008 fname++;
32010 /* Need label immediately before tbtab, so we can compute
32011 its offset from the function start. */
32012 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32013 ASM_OUTPUT_LABEL (file, fname);
32016 /* The .tbtab pseudo-op can only be used for the first eight
32017 expressions, since it can't handle the possibly variable
32018 length fields that follow. However, if you omit the optional
32019 fields, the assembler outputs zeros for all optional fields
32020 anyways, giving each variable length field is minimum length
32021 (as defined in sys/debug.h). Thus we can not use the .tbtab
32022 pseudo-op at all. */
32024 /* An all-zero word flags the start of the tbtab, for debuggers
32025 that have to find it by searching forward from the entry
32026 point or from the current pc. */
32027 fputs ("\t.long 0\n", file);
32029 /* Tbtab format type. Use format type 0. */
32030 fputs ("\t.byte 0,", file);
32032 /* Language type. Unfortunately, there does not seem to be any
32033 official way to discover the language being compiled, so we
32034 use language_string.
32035 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
32036 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
32037 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
32038 either, so for now use 0. */
32039 if (lang_GNU_C ()
32040 || ! strcmp (language_string, "GNU GIMPLE")
32041 || ! strcmp (language_string, "GNU Go")
32042 || ! strcmp (language_string, "libgccjit"))
32043 i = 0;
32044 else if (! strcmp (language_string, "GNU F77")
32045 || lang_GNU_Fortran ())
32046 i = 1;
32047 else if (! strcmp (language_string, "GNU Pascal"))
32048 i = 2;
32049 else if (! strcmp (language_string, "GNU Ada"))
32050 i = 3;
32051 else if (lang_GNU_CXX ()
32052 || ! strcmp (language_string, "GNU Objective-C++"))
32053 i = 9;
32054 else if (! strcmp (language_string, "GNU Java"))
32055 i = 13;
32056 else if (! strcmp (language_string, "GNU Objective-C"))
32057 i = 14;
32058 else
32059 gcc_unreachable ();
32060 fprintf (file, "%d,", i);
32062 /* 8 single bit fields: global linkage (not set for C extern linkage,
32063 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
32064 from start of procedure stored in tbtab, internal function, function
32065 has controlled storage, function has no toc, function uses fp,
32066 function logs/aborts fp operations. */
32067 /* Assume that fp operations are used if any fp reg must be saved. */
32068 fprintf (file, "%d,",
32069 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
32071 /* 6 bitfields: function is interrupt handler, name present in
32072 proc table, function calls alloca, on condition directives
32073 (controls stack walks, 3 bits), saves condition reg, saves
32074 link reg. */
32075 /* The `function calls alloca' bit seems to be set whenever reg 31 is
32076 set up as a frame pointer, even when there is no alloca call. */
32077 fprintf (file, "%d,",
32078 ((optional_tbtab << 6)
32079 | ((optional_tbtab & frame_pointer_needed) << 5)
32080 | (info->cr_save_p << 1)
32081 | (info->lr_save_p)));
32083 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
32084 (6 bits). */
32085 fprintf (file, "%d,",
32086 (info->push_p << 7) | (64 - info->first_fp_reg_save));
32088 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
32089 fprintf (file, "%d,", (32 - first_reg_to_save ()));
32091 if (optional_tbtab)
32093 /* Compute the parameter info from the function decl argument
32094 list. */
32095 tree decl;
32096 int next_parm_info_bit = 31;
32098 for (decl = DECL_ARGUMENTS (current_function_decl);
32099 decl; decl = DECL_CHAIN (decl))
32101 rtx parameter = DECL_INCOMING_RTL (decl);
32102 machine_mode mode = GET_MODE (parameter);
32104 if (GET_CODE (parameter) == REG)
32106 if (SCALAR_FLOAT_MODE_P (mode))
32108 int bits;
32110 float_parms++;
32112 switch (mode)
32114 case E_SFmode:
32115 case E_SDmode:
32116 bits = 0x2;
32117 break;
32119 case E_DFmode:
32120 case E_DDmode:
32121 case E_TFmode:
32122 case E_TDmode:
32123 case E_IFmode:
32124 case E_KFmode:
32125 bits = 0x3;
32126 break;
32128 default:
32129 gcc_unreachable ();
32132 /* If only one bit will fit, don't or in this entry. */
32133 if (next_parm_info_bit > 0)
32134 parm_info |= (bits << (next_parm_info_bit - 1));
32135 next_parm_info_bit -= 2;
32137 else
32139 fixed_parms += ((GET_MODE_SIZE (mode)
32140 + (UNITS_PER_WORD - 1))
32141 / UNITS_PER_WORD);
32142 next_parm_info_bit -= 1;
32148 /* Number of fixed point parameters. */
32149 /* This is actually the number of words of fixed point parameters; thus
32150 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32151 fprintf (file, "%d,", fixed_parms);
32153 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32154 all on stack. */
32155 /* This is actually the number of fp registers that hold parameters;
32156 and thus the maximum value is 13. */
32157 /* Set parameters on stack bit if parameters are not in their original
32158 registers, regardless of whether they are on the stack? Xlc
32159 seems to set the bit when not optimizing. */
32160 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
32162 if (optional_tbtab)
32164 /* Optional fields follow. Some are variable length. */
32166 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32167 float, 11 double float. */
32168 /* There is an entry for each parameter in a register, in the order
32169 that they occur in the parameter list. Any intervening arguments
32170 on the stack are ignored. If the list overflows a long (max
32171 possible length 34 bits) then completely leave off all elements
32172 that don't fit. */
32173 /* Only emit this long if there was at least one parameter. */
32174 if (fixed_parms || float_parms)
32175 fprintf (file, "\t.long %d\n", parm_info);
32177 /* Offset from start of code to tb table. */
32178 fputs ("\t.long ", file);
32179 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32180 RS6000_OUTPUT_BASENAME (file, fname);
32181 putc ('-', file);
32182 rs6000_output_function_entry (file, fname);
32183 putc ('\n', file);
32185 /* Interrupt handler mask. */
32186 /* Omit this long, since we never set the interrupt handler bit
32187 above. */
32189 /* Number of CTL (controlled storage) anchors. */
32190 /* Omit this long, since the has_ctl bit is never set above. */
32192 /* Displacement into stack of each CTL anchor. */
32193 /* Omit this list of longs, because there are no CTL anchors. */
32195 /* Length of function name. */
32196 if (*fname == '*')
32197 ++fname;
32198 fprintf (file, "\t.short %d\n", (int) strlen (fname));
32200 /* Function name. */
32201 assemble_string (fname, strlen (fname));
32203 /* Register for alloca automatic storage; this is always reg 31.
32204 Only emit this if the alloca bit was set above. */
32205 if (frame_pointer_needed)
32206 fputs ("\t.byte 31\n", file);
32208 fputs ("\t.align 2\n", file);
32212 /* Arrange to define .LCTOC1 label, if not already done. */
32213 if (need_toc_init)
32215 need_toc_init = 0;
32216 if (!toc_initialized)
32218 switch_to_section (toc_section);
32219 switch_to_section (current_function_section ());
32224 /* -fsplit-stack support. */
32226 /* A SYMBOL_REF for __morestack. */
32227 static GTY(()) rtx morestack_ref;
32229 static rtx
32230 gen_add3_const (rtx rt, rtx ra, long c)
32232 if (TARGET_64BIT)
32233 return gen_adddi3 (rt, ra, GEN_INT (c));
32234 else
32235 return gen_addsi3 (rt, ra, GEN_INT (c));
32238 /* Emit -fsplit-stack prologue, which goes before the regular function
32239 prologue (at local entry point in the case of ELFv2). */
32241 void
32242 rs6000_expand_split_stack_prologue (void)
32244 rs6000_stack_t *info = rs6000_stack_info ();
32245 unsigned HOST_WIDE_INT allocate;
32246 long alloc_hi, alloc_lo;
32247 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
32248 rtx_insn *insn;
32250 gcc_assert (flag_split_stack && reload_completed);
32252 if (!info->push_p)
32253 return;
32255 if (global_regs[29])
32257 error ("-fsplit-stack uses register r29");
32258 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
32259 "conflicts with %qD", global_regs_decl[29]);
32262 allocate = info->total_size;
32263 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
32265 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32266 return;
32268 if (morestack_ref == NULL_RTX)
32270 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
32271 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
32272 | SYMBOL_FLAG_FUNCTION);
32275 r0 = gen_rtx_REG (Pmode, 0);
32276 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32277 r12 = gen_rtx_REG (Pmode, 12);
32278 emit_insn (gen_load_split_stack_limit (r0));
32279 /* Always emit two insns here to calculate the requested stack,
32280 so that the linker can edit them when adjusting size for calling
32281 non-split-stack code. */
32282 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
32283 alloc_lo = -allocate - alloc_hi;
32284 if (alloc_hi != 0)
32286 emit_insn (gen_add3_const (r12, r1, alloc_hi));
32287 if (alloc_lo != 0)
32288 emit_insn (gen_add3_const (r12, r12, alloc_lo));
32289 else
32290 emit_insn (gen_nop ());
32292 else
32294 emit_insn (gen_add3_const (r12, r1, alloc_lo));
32295 emit_insn (gen_nop ());
32298 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
32299 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
32300 ok_label = gen_label_rtx ();
32301 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32302 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
32303 gen_rtx_LABEL_REF (VOIDmode, ok_label),
32304 pc_rtx);
32305 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32306 JUMP_LABEL (insn) = ok_label;
32307 /* Mark the jump as very likely to be taken. */
32308 add_reg_br_prob_note (insn, profile_probability::very_likely ());
32310 lr = gen_rtx_REG (Pmode, LR_REGNO);
32311 insn = emit_move_insn (r0, lr);
32312 RTX_FRAME_RELATED_P (insn) = 1;
32313 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
32314 RTX_FRAME_RELATED_P (insn) = 1;
32316 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
32317 const0_rtx, const0_rtx));
32318 call_fusage = NULL_RTX;
32319 use_reg (&call_fusage, r12);
32320 /* Say the call uses r0, even though it doesn't, to stop regrename
32321 from twiddling with the insns saving lr, trashing args for cfun.
32322 The insns restoring lr are similarly protected by making
32323 split_stack_return use r0. */
32324 use_reg (&call_fusage, r0);
32325 add_function_usage_to (insn, call_fusage);
32326 /* Indicate that this function can't jump to non-local gotos. */
32327 make_reg_eh_region_note_nothrow_nononlocal (insn);
32328 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
32329 insn = emit_move_insn (lr, r0);
32330 add_reg_note (insn, REG_CFA_RESTORE, lr);
32331 RTX_FRAME_RELATED_P (insn) = 1;
32332 emit_insn (gen_split_stack_return ());
32334 emit_label (ok_label);
32335 LABEL_NUSES (ok_label) = 1;
32338 /* Return the internal arg pointer used for function incoming
32339 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32340 to copy it to a pseudo in order for it to be preserved over calls
32341 and suchlike. We'd really like to use a pseudo here for the
32342 internal arg pointer but data-flow analysis is not prepared to
32343 accept pseudos as live at the beginning of a function. */
32345 static rtx
32346 rs6000_internal_arg_pointer (void)
32348 if (flag_split_stack
32349 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
32350 == NULL))
32353 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
32355 rtx pat;
32357 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
32358 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
32360 /* Put the pseudo initialization right after the note at the
32361 beginning of the function. */
32362 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
32363 gen_rtx_REG (Pmode, 12));
32364 push_topmost_sequence ();
32365 emit_insn_after (pat, get_insns ());
32366 pop_topmost_sequence ();
32368 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
32369 FIRST_PARM_OFFSET (current_function_decl));
32371 return virtual_incoming_args_rtx;
32374 /* We may have to tell the dataflow pass that the split stack prologue
32375 is initializing a register. */
32377 static void
32378 rs6000_live_on_entry (bitmap regs)
32380 if (flag_split_stack)
32381 bitmap_set_bit (regs, 12);
32384 /* Emit -fsplit-stack dynamic stack allocation space check. */
32386 void
32387 rs6000_split_stack_space_check (rtx size, rtx label)
32389 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32390 rtx limit = gen_reg_rtx (Pmode);
32391 rtx requested = gen_reg_rtx (Pmode);
32392 rtx cmp = gen_reg_rtx (CCUNSmode);
32393 rtx jump;
32395 emit_insn (gen_load_split_stack_limit (limit));
32396 if (CONST_INT_P (size))
32397 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
32398 else
32400 size = force_reg (Pmode, size);
32401 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
32403 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
32404 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32405 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
32406 gen_rtx_LABEL_REF (VOIDmode, label),
32407 pc_rtx);
32408 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32409 JUMP_LABEL (jump) = label;
32412 /* A C compound statement that outputs the assembler code for a thunk
32413 function, used to implement C++ virtual function calls with
32414 multiple inheritance. The thunk acts as a wrapper around a virtual
32415 function, adjusting the implicit object parameter before handing
32416 control off to the real function.
32418 First, emit code to add the integer DELTA to the location that
32419 contains the incoming first argument. Assume that this argument
32420 contains a pointer, and is the one used to pass the `this' pointer
32421 in C++. This is the incoming argument *before* the function
32422 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32423 values of all other incoming arguments.
32425 After the addition, emit code to jump to FUNCTION, which is a
32426 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32427 not touch the return address. Hence returning from FUNCTION will
32428 return to whoever called the current `thunk'.
32430 The effect must be as if FUNCTION had been called directly with the
32431 adjusted first argument. This macro is responsible for emitting
32432 all of the code for a thunk function; output_function_prologue()
32433 and output_function_epilogue() are not invoked.
32435 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32436 been extracted from it.) It might possibly be useful on some
32437 targets, but probably not.
32439 If you do not define this macro, the target-independent code in the
32440 C++ frontend will generate a less efficient heavyweight thunk that
32441 calls FUNCTION instead of jumping to it. The generic approach does
32442 not support varargs. */
32444 static void
32445 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
32446 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
32447 tree function)
32449 rtx this_rtx, funexp;
32450 rtx_insn *insn;
32452 reload_completed = 1;
32453 epilogue_completed = 1;
32455 /* Mark the end of the (empty) prologue. */
32456 emit_note (NOTE_INSN_PROLOGUE_END);
32458 /* Find the "this" pointer. If the function returns a structure,
32459 the structure return pointer is in r3. */
32460 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
32461 this_rtx = gen_rtx_REG (Pmode, 4);
32462 else
32463 this_rtx = gen_rtx_REG (Pmode, 3);
32465 /* Apply the constant offset, if required. */
32466 if (delta)
32467 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
32469 /* Apply the offset from the vtable, if required. */
32470 if (vcall_offset)
32472 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
32473 rtx tmp = gen_rtx_REG (Pmode, 12);
32475 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
32476 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
32478 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
32479 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
32481 else
32483 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
32485 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
32487 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
32490 /* Generate a tail call to the target function. */
32491 if (!TREE_USED (function))
32493 assemble_external (function);
32494 TREE_USED (function) = 1;
32496 funexp = XEXP (DECL_RTL (function), 0);
32497 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32499 #if TARGET_MACHO
32500 if (MACHOPIC_INDIRECT)
32501 funexp = machopic_indirect_call_target (funexp);
32502 #endif
32504 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32505 generate sibcall RTL explicitly. */
32506 insn = emit_call_insn (
32507 gen_rtx_PARALLEL (VOIDmode,
32508 gen_rtvec (3,
32509 gen_rtx_CALL (VOIDmode,
32510 funexp, const0_rtx),
32511 gen_rtx_USE (VOIDmode, const0_rtx),
32512 simple_return_rtx)));
32513 SIBLING_CALL_P (insn) = 1;
32514 emit_barrier ();
32516 /* Run just enough of rest_of_compilation to get the insns emitted.
32517 There's not really enough bulk here to make other passes such as
32518 instruction scheduling worth while. Note that use_thunk calls
32519 assemble_start_function and assemble_end_function. */
32520 insn = get_insns ();
32521 shorten_branches (insn);
32522 final_start_function (insn, file, 1);
32523 final (insn, file, 1);
32524 final_end_function ();
32526 reload_completed = 0;
32527 epilogue_completed = 0;
32530 /* A quick summary of the various types of 'constant-pool tables'
32531 under PowerPC:
32533 Target Flags Name One table per
32534 AIX (none) AIX TOC object file
32535 AIX -mfull-toc AIX TOC object file
32536 AIX -mminimal-toc AIX minimal TOC translation unit
32537 SVR4/EABI (none) SVR4 SDATA object file
32538 SVR4/EABI -fpic SVR4 pic object file
32539 SVR4/EABI -fPIC SVR4 PIC translation unit
32540 SVR4/EABI -mrelocatable EABI TOC function
32541 SVR4/EABI -maix AIX TOC object file
32542 SVR4/EABI -maix -mminimal-toc
32543 AIX minimal TOC translation unit
32545 Name Reg. Set by entries contains:
32546 made by addrs? fp? sum?
32548 AIX TOC 2 crt0 as Y option option
32549 AIX minimal TOC 30 prolog gcc Y Y option
32550 SVR4 SDATA 13 crt0 gcc N Y N
32551 SVR4 pic 30 prolog ld Y not yet N
32552 SVR4 PIC 30 prolog gcc Y option option
32553 EABI TOC 30 prolog gcc Y option option
32557 /* Hash functions for the hash table. */
32559 static unsigned
32560 rs6000_hash_constant (rtx k)
32562 enum rtx_code code = GET_CODE (k);
32563 machine_mode mode = GET_MODE (k);
32564 unsigned result = (code << 3) ^ mode;
32565 const char *format;
32566 int flen, fidx;
32568 format = GET_RTX_FORMAT (code);
32569 flen = strlen (format);
32570 fidx = 0;
32572 switch (code)
32574 case LABEL_REF:
32575 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32577 case CONST_WIDE_INT:
32579 int i;
32580 flen = CONST_WIDE_INT_NUNITS (k);
32581 for (i = 0; i < flen; i++)
32582 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32583 return result;
32586 case CONST_DOUBLE:
32587 if (mode != VOIDmode)
32588 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32589 flen = 2;
32590 break;
32592 case CODE_LABEL:
32593 fidx = 3;
32594 break;
32596 default:
32597 break;
32600 for (; fidx < flen; fidx++)
32601 switch (format[fidx])
32603 case 's':
32605 unsigned i, len;
32606 const char *str = XSTR (k, fidx);
32607 len = strlen (str);
32608 result = result * 613 + len;
32609 for (i = 0; i < len; i++)
32610 result = result * 613 + (unsigned) str[i];
32611 break;
32613 case 'u':
32614 case 'e':
32615 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32616 break;
32617 case 'i':
32618 case 'n':
32619 result = result * 613 + (unsigned) XINT (k, fidx);
32620 break;
32621 case 'w':
32622 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32623 result = result * 613 + (unsigned) XWINT (k, fidx);
32624 else
32626 size_t i;
32627 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32628 result = result * 613 + (unsigned) (XWINT (k, fidx)
32629 >> CHAR_BIT * i);
32631 break;
32632 case '0':
32633 break;
32634 default:
32635 gcc_unreachable ();
32638 return result;
32641 hashval_t
32642 toc_hasher::hash (toc_hash_struct *thc)
32644 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32647 /* Compare H1 and H2 for equivalence. */
32649 bool
32650 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32652 rtx r1 = h1->key;
32653 rtx r2 = h2->key;
32655 if (h1->key_mode != h2->key_mode)
32656 return 0;
32658 return rtx_equal_p (r1, r2);
32661 /* These are the names given by the C++ front-end to vtables, and
32662 vtable-like objects. Ideally, this logic should not be here;
32663 instead, there should be some programmatic way of inquiring as
32664 to whether or not an object is a vtable. */
32666 #define VTABLE_NAME_P(NAME) \
32667 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32668 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32669 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32670 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32671 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32673 #ifdef NO_DOLLAR_IN_LABEL
32674 /* Return a GGC-allocated character string translating dollar signs in
32675 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32677 const char *
32678 rs6000_xcoff_strip_dollar (const char *name)
32680 char *strip, *p;
32681 const char *q;
32682 size_t len;
32684 q = (const char *) strchr (name, '$');
32686 if (q == 0 || q == name)
32687 return name;
32689 len = strlen (name);
32690 strip = XALLOCAVEC (char, len + 1);
32691 strcpy (strip, name);
32692 p = strip + (q - name);
32693 while (p)
32695 *p = '_';
32696 p = strchr (p + 1, '$');
32699 return ggc_alloc_string (strip, len);
32701 #endif
32703 void
32704 rs6000_output_symbol_ref (FILE *file, rtx x)
32706 const char *name = XSTR (x, 0);
32708 /* Currently C++ toc references to vtables can be emitted before it
32709 is decided whether the vtable is public or private. If this is
32710 the case, then the linker will eventually complain that there is
32711 a reference to an unknown section. Thus, for vtables only,
32712 we emit the TOC reference to reference the identifier and not the
32713 symbol. */
32714 if (VTABLE_NAME_P (name))
32716 RS6000_OUTPUT_BASENAME (file, name);
32718 else
32719 assemble_name (file, name);
32722 /* Output a TOC entry. We derive the entry name from what is being
32723 written. */
32725 void
32726 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32728 char buf[256];
32729 const char *name = buf;
32730 rtx base = x;
32731 HOST_WIDE_INT offset = 0;
32733 gcc_assert (!TARGET_NO_TOC);
32735 /* When the linker won't eliminate them, don't output duplicate
32736 TOC entries (this happens on AIX if there is any kind of TOC,
32737 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32738 CODE_LABELs. */
32739 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32741 struct toc_hash_struct *h;
32743 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32744 time because GGC is not initialized at that point. */
32745 if (toc_hash_table == NULL)
32746 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32748 h = ggc_alloc<toc_hash_struct> ();
32749 h->key = x;
32750 h->key_mode = mode;
32751 h->labelno = labelno;
32753 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32754 if (*found == NULL)
32755 *found = h;
32756 else /* This is indeed a duplicate.
32757 Set this label equal to that label. */
32759 fputs ("\t.set ", file);
32760 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32761 fprintf (file, "%d,", labelno);
32762 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32763 fprintf (file, "%d\n", ((*found)->labelno));
32765 #ifdef HAVE_AS_TLS
32766 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32767 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32768 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32770 fputs ("\t.set ", file);
32771 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32772 fprintf (file, "%d,", labelno);
32773 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32774 fprintf (file, "%d\n", ((*found)->labelno));
32776 #endif
32777 return;
32781 /* If we're going to put a double constant in the TOC, make sure it's
32782 aligned properly when strict alignment is on. */
32783 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32784 && STRICT_ALIGNMENT
32785 && GET_MODE_BITSIZE (mode) >= 64
32786 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32787 ASM_OUTPUT_ALIGN (file, 3);
32790 (*targetm.asm_out.internal_label) (file, "LC", labelno);
32792 /* Handle FP constants specially. Note that if we have a minimal
32793 TOC, things we put here aren't actually in the TOC, so we can allow
32794 FP constants. */
32795 if (GET_CODE (x) == CONST_DOUBLE &&
32796 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32797 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32799 long k[4];
32801 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32802 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32803 else
32804 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32806 if (TARGET_64BIT)
32808 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32809 fputs (DOUBLE_INT_ASM_OP, file);
32810 else
32811 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32812 k[0] & 0xffffffff, k[1] & 0xffffffff,
32813 k[2] & 0xffffffff, k[3] & 0xffffffff);
32814 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32815 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32816 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32817 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32818 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32819 return;
32821 else
32823 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32824 fputs ("\t.long ", file);
32825 else
32826 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32827 k[0] & 0xffffffff, k[1] & 0xffffffff,
32828 k[2] & 0xffffffff, k[3] & 0xffffffff);
32829 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32830 k[0] & 0xffffffff, k[1] & 0xffffffff,
32831 k[2] & 0xffffffff, k[3] & 0xffffffff);
32832 return;
32835 else if (GET_CODE (x) == CONST_DOUBLE &&
32836 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32838 long k[2];
32840 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32841 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32842 else
32843 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32845 if (TARGET_64BIT)
32847 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32848 fputs (DOUBLE_INT_ASM_OP, file);
32849 else
32850 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32851 k[0] & 0xffffffff, k[1] & 0xffffffff);
32852 fprintf (file, "0x%lx%08lx\n",
32853 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32854 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32855 return;
32857 else
32859 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32860 fputs ("\t.long ", file);
32861 else
32862 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32863 k[0] & 0xffffffff, k[1] & 0xffffffff);
32864 fprintf (file, "0x%lx,0x%lx\n",
32865 k[0] & 0xffffffff, k[1] & 0xffffffff);
32866 return;
32869 else if (GET_CODE (x) == CONST_DOUBLE &&
32870 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32872 long l;
32874 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32875 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32876 else
32877 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32879 if (TARGET_64BIT)
32881 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32882 fputs (DOUBLE_INT_ASM_OP, file);
32883 else
32884 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32885 if (WORDS_BIG_ENDIAN)
32886 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32887 else
32888 fprintf (file, "0x%lx\n", l & 0xffffffff);
32889 return;
32891 else
32893 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32894 fputs ("\t.long ", file);
32895 else
32896 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32897 fprintf (file, "0x%lx\n", l & 0xffffffff);
32898 return;
32901 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32903 unsigned HOST_WIDE_INT low;
32904 HOST_WIDE_INT high;
32906 low = INTVAL (x) & 0xffffffff;
32907 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32909 /* TOC entries are always Pmode-sized, so when big-endian
32910 smaller integer constants in the TOC need to be padded.
32911 (This is still a win over putting the constants in
32912 a separate constant pool, because then we'd have
32913 to have both a TOC entry _and_ the actual constant.)
32915 For a 32-bit target, CONST_INT values are loaded and shifted
32916 entirely within `low' and can be stored in one TOC entry. */
32918 /* It would be easy to make this work, but it doesn't now. */
32919 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32921 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32923 low |= high << 32;
32924 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32925 high = (HOST_WIDE_INT) low >> 32;
32926 low &= 0xffffffff;
32929 if (TARGET_64BIT)
32931 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32932 fputs (DOUBLE_INT_ASM_OP, file);
32933 else
32934 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32935 (long) high & 0xffffffff, (long) low & 0xffffffff);
32936 fprintf (file, "0x%lx%08lx\n",
32937 (long) high & 0xffffffff, (long) low & 0xffffffff);
32938 return;
32940 else
32942 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32944 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32945 fputs ("\t.long ", file);
32946 else
32947 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32948 (long) high & 0xffffffff, (long) low & 0xffffffff);
32949 fprintf (file, "0x%lx,0x%lx\n",
32950 (long) high & 0xffffffff, (long) low & 0xffffffff);
32952 else
32954 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32955 fputs ("\t.long ", file);
32956 else
32957 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32958 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32960 return;
32964 if (GET_CODE (x) == CONST)
32966 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32967 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32969 base = XEXP (XEXP (x, 0), 0);
32970 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32973 switch (GET_CODE (base))
32975 case SYMBOL_REF:
32976 name = XSTR (base, 0);
32977 break;
32979 case LABEL_REF:
32980 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32981 CODE_LABEL_NUMBER (XEXP (base, 0)));
32982 break;
32984 case CODE_LABEL:
32985 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32986 break;
32988 default:
32989 gcc_unreachable ();
32992 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32993 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32994 else
32996 fputs ("\t.tc ", file);
32997 RS6000_OUTPUT_BASENAME (file, name);
32999 if (offset < 0)
33000 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
33001 else if (offset)
33002 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
33004 /* Mark large TOC symbols on AIX with [TE] so they are mapped
33005 after other TOC symbols, reducing overflow of small TOC access
33006 to [TC] symbols. */
33007 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
33008 ? "[TE]," : "[TC],", file);
33011 /* Currently C++ toc references to vtables can be emitted before it
33012 is decided whether the vtable is public or private. If this is
33013 the case, then the linker will eventually complain that there is
33014 a TOC reference to an unknown section. Thus, for vtables only,
33015 we emit the TOC reference to reference the symbol and not the
33016 section. */
33017 if (VTABLE_NAME_P (name))
33019 RS6000_OUTPUT_BASENAME (file, name);
33020 if (offset < 0)
33021 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
33022 else if (offset > 0)
33023 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
33025 else
33026 output_addr_const (file, x);
33028 #if HAVE_AS_TLS
33029 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
33031 switch (SYMBOL_REF_TLS_MODEL (base))
33033 case 0:
33034 break;
33035 case TLS_MODEL_LOCAL_EXEC:
33036 fputs ("@le", file);
33037 break;
33038 case TLS_MODEL_INITIAL_EXEC:
33039 fputs ("@ie", file);
33040 break;
33041 /* Use global-dynamic for local-dynamic. */
33042 case TLS_MODEL_GLOBAL_DYNAMIC:
33043 case TLS_MODEL_LOCAL_DYNAMIC:
33044 putc ('\n', file);
33045 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
33046 fputs ("\t.tc .", file);
33047 RS6000_OUTPUT_BASENAME (file, name);
33048 fputs ("[TC],", file);
33049 output_addr_const (file, x);
33050 fputs ("@m", file);
33051 break;
33052 default:
33053 gcc_unreachable ();
33056 #endif
33058 putc ('\n', file);
33061 /* Output an assembler pseudo-op to write an ASCII string of N characters
33062 starting at P to FILE.
33064 On the RS/6000, we have to do this using the .byte operation and
33065 write out special characters outside the quoted string.
33066 Also, the assembler is broken; very long strings are truncated,
33067 so we must artificially break them up early. */
33069 void
33070 output_ascii (FILE *file, const char *p, int n)
33072 char c;
33073 int i, count_string;
33074 const char *for_string = "\t.byte \"";
33075 const char *for_decimal = "\t.byte ";
33076 const char *to_close = NULL;
33078 count_string = 0;
33079 for (i = 0; i < n; i++)
33081 c = *p++;
33082 if (c >= ' ' && c < 0177)
33084 if (for_string)
33085 fputs (for_string, file);
33086 putc (c, file);
33088 /* Write two quotes to get one. */
33089 if (c == '"')
33091 putc (c, file);
33092 ++count_string;
33095 for_string = NULL;
33096 for_decimal = "\"\n\t.byte ";
33097 to_close = "\"\n";
33098 ++count_string;
33100 if (count_string >= 512)
33102 fputs (to_close, file);
33104 for_string = "\t.byte \"";
33105 for_decimal = "\t.byte ";
33106 to_close = NULL;
33107 count_string = 0;
33110 else
33112 if (for_decimal)
33113 fputs (for_decimal, file);
33114 fprintf (file, "%d", c);
33116 for_string = "\n\t.byte \"";
33117 for_decimal = ", ";
33118 to_close = "\n";
33119 count_string = 0;
33123 /* Now close the string if we have written one. Then end the line. */
33124 if (to_close)
33125 fputs (to_close, file);
33128 /* Generate a unique section name for FILENAME for a section type
33129 represented by SECTION_DESC. Output goes into BUF.
33131 SECTION_DESC can be any string, as long as it is different for each
33132 possible section type.
33134 We name the section in the same manner as xlc. The name begins with an
33135 underscore followed by the filename (after stripping any leading directory
33136 names) with the last period replaced by the string SECTION_DESC. If
33137 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33138 the name. */
33140 void
33141 rs6000_gen_section_name (char **buf, const char *filename,
33142 const char *section_desc)
33144 const char *q, *after_last_slash, *last_period = 0;
33145 char *p;
33146 int len;
33148 after_last_slash = filename;
33149 for (q = filename; *q; q++)
33151 if (*q == '/')
33152 after_last_slash = q + 1;
33153 else if (*q == '.')
33154 last_period = q;
33157 len = strlen (after_last_slash) + strlen (section_desc) + 2;
33158 *buf = (char *) xmalloc (len);
33160 p = *buf;
33161 *p++ = '_';
33163 for (q = after_last_slash; *q; q++)
33165 if (q == last_period)
33167 strcpy (p, section_desc);
33168 p += strlen (section_desc);
33169 break;
33172 else if (ISALNUM (*q))
33173 *p++ = *q;
33176 if (last_period == 0)
33177 strcpy (p, section_desc);
33178 else
33179 *p = '\0';
33182 /* Emit profile function. */
33184 void
33185 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
33187 /* Non-standard profiling for kernels, which just saves LR then calls
33188 _mcount without worrying about arg saves. The idea is to change
33189 the function prologue as little as possible as it isn't easy to
33190 account for arg save/restore code added just for _mcount. */
33191 if (TARGET_PROFILE_KERNEL)
33192 return;
33194 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33196 #ifndef NO_PROFILE_COUNTERS
33197 # define NO_PROFILE_COUNTERS 0
33198 #endif
33199 if (NO_PROFILE_COUNTERS)
33200 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33201 LCT_NORMAL, VOIDmode);
33202 else
33204 char buf[30];
33205 const char *label_name;
33206 rtx fun;
33208 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33209 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
33210 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
33212 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33213 LCT_NORMAL, VOIDmode, fun, Pmode);
33216 else if (DEFAULT_ABI == ABI_DARWIN)
33218 const char *mcount_name = RS6000_MCOUNT;
33219 int caller_addr_regno = LR_REGNO;
33221 /* Be conservative and always set this, at least for now. */
33222 crtl->uses_pic_offset_table = 1;
33224 #if TARGET_MACHO
33225 /* For PIC code, set up a stub and collect the caller's address
33226 from r0, which is where the prologue puts it. */
33227 if (MACHOPIC_INDIRECT
33228 && crtl->uses_pic_offset_table)
33229 caller_addr_regno = 0;
33230 #endif
33231 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
33232 LCT_NORMAL, VOIDmode,
33233 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
33237 /* Write function profiler code. */
33239 void
33240 output_function_profiler (FILE *file, int labelno)
33242 char buf[100];
33244 switch (DEFAULT_ABI)
33246 default:
33247 gcc_unreachable ();
33249 case ABI_V4:
33250 if (!TARGET_32BIT)
33252 warning (0, "no profiling of 64-bit code for this ABI");
33253 return;
33255 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33256 fprintf (file, "\tmflr %s\n", reg_names[0]);
33257 if (NO_PROFILE_COUNTERS)
33259 asm_fprintf (file, "\tstw %s,4(%s)\n",
33260 reg_names[0], reg_names[1]);
33262 else if (TARGET_SECURE_PLT && flag_pic)
33264 if (TARGET_LINK_STACK)
33266 char name[32];
33267 get_ppc476_thunk_name (name);
33268 asm_fprintf (file, "\tbl %s\n", name);
33270 else
33271 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
33272 asm_fprintf (file, "\tstw %s,4(%s)\n",
33273 reg_names[0], reg_names[1]);
33274 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33275 asm_fprintf (file, "\taddis %s,%s,",
33276 reg_names[12], reg_names[12]);
33277 assemble_name (file, buf);
33278 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
33279 assemble_name (file, buf);
33280 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
33282 else if (flag_pic == 1)
33284 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
33285 asm_fprintf (file, "\tstw %s,4(%s)\n",
33286 reg_names[0], reg_names[1]);
33287 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33288 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
33289 assemble_name (file, buf);
33290 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
33292 else if (flag_pic > 1)
33294 asm_fprintf (file, "\tstw %s,4(%s)\n",
33295 reg_names[0], reg_names[1]);
33296 /* Now, we need to get the address of the label. */
33297 if (TARGET_LINK_STACK)
33299 char name[32];
33300 get_ppc476_thunk_name (name);
33301 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
33302 assemble_name (file, buf);
33303 fputs ("-.\n1:", file);
33304 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33305 asm_fprintf (file, "\taddi %s,%s,4\n",
33306 reg_names[11], reg_names[11]);
33308 else
33310 fputs ("\tbcl 20,31,1f\n\t.long ", file);
33311 assemble_name (file, buf);
33312 fputs ("-.\n1:", file);
33313 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33315 asm_fprintf (file, "\tlwz %s,0(%s)\n",
33316 reg_names[0], reg_names[11]);
33317 asm_fprintf (file, "\tadd %s,%s,%s\n",
33318 reg_names[0], reg_names[0], reg_names[11]);
33320 else
33322 asm_fprintf (file, "\tlis %s,", reg_names[12]);
33323 assemble_name (file, buf);
33324 fputs ("@ha\n", file);
33325 asm_fprintf (file, "\tstw %s,4(%s)\n",
33326 reg_names[0], reg_names[1]);
33327 asm_fprintf (file, "\tla %s,", reg_names[0]);
33328 assemble_name (file, buf);
33329 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
33332 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33333 fprintf (file, "\tbl %s%s\n",
33334 RS6000_MCOUNT, flag_pic ? "@plt" : "");
33335 break;
33337 case ABI_AIX:
33338 case ABI_ELFv2:
33339 case ABI_DARWIN:
33340 /* Don't do anything, done in output_profile_hook (). */
33341 break;
33347 /* The following variable value is the last issued insn. */
33349 static rtx_insn *last_scheduled_insn;
33351 /* The following variable helps to balance issuing of load and
33352 store instructions */
33354 static int load_store_pendulum;
33356 /* The following variable helps pair divide insns during scheduling. */
33357 static int divide_cnt;
33358 /* The following variable helps pair and alternate vector and vector load
33359 insns during scheduling. */
33360 static int vec_pairing;
33363 /* Power4 load update and store update instructions are cracked into a
33364 load or store and an integer insn which are executed in the same cycle.
33365 Branches have their own dispatch slot which does not count against the
33366 GCC issue rate, but it changes the program flow so there are no other
33367 instructions to issue in this cycle. */
33369 static int
33370 rs6000_variable_issue_1 (rtx_insn *insn, int more)
33372 last_scheduled_insn = insn;
33373 if (GET_CODE (PATTERN (insn)) == USE
33374 || GET_CODE (PATTERN (insn)) == CLOBBER)
33376 cached_can_issue_more = more;
33377 return cached_can_issue_more;
33380 if (insn_terminates_group_p (insn, current_group))
33382 cached_can_issue_more = 0;
33383 return cached_can_issue_more;
33386 /* If no reservation, but reach here */
33387 if (recog_memoized (insn) < 0)
33388 return more;
33390 if (rs6000_sched_groups)
33392 if (is_microcoded_insn (insn))
33393 cached_can_issue_more = 0;
33394 else if (is_cracked_insn (insn))
33395 cached_can_issue_more = more > 2 ? more - 2 : 0;
33396 else
33397 cached_can_issue_more = more - 1;
33399 return cached_can_issue_more;
33402 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
33403 return 0;
33405 cached_can_issue_more = more - 1;
33406 return cached_can_issue_more;
33409 static int
33410 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
33412 int r = rs6000_variable_issue_1 (insn, more);
33413 if (verbose)
33414 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
33415 return r;
33418 /* Adjust the cost of a scheduling dependency. Return the new cost of
33419 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33421 static int
33422 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
33423 unsigned int)
33425 enum attr_type attr_type;
33427 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
33428 return cost;
33430 switch (dep_type)
33432 case REG_DEP_TRUE:
33434 /* Data dependency; DEP_INSN writes a register that INSN reads
33435 some cycles later. */
33437 /* Separate a load from a narrower, dependent store. */
33438 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
33439 && GET_CODE (PATTERN (insn)) == SET
33440 && GET_CODE (PATTERN (dep_insn)) == SET
33441 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
33442 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
33443 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
33444 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
33445 return cost + 14;
33447 attr_type = get_attr_type (insn);
33449 switch (attr_type)
33451 case TYPE_JMPREG:
33452 /* Tell the first scheduling pass about the latency between
33453 a mtctr and bctr (and mtlr and br/blr). The first
33454 scheduling pass will not know about this latency since
33455 the mtctr instruction, which has the latency associated
33456 to it, will be generated by reload. */
33457 return 4;
33458 case TYPE_BRANCH:
33459 /* Leave some extra cycles between a compare and its
33460 dependent branch, to inhibit expensive mispredicts. */
33461 if ((rs6000_cpu_attr == CPU_PPC603
33462 || rs6000_cpu_attr == CPU_PPC604
33463 || rs6000_cpu_attr == CPU_PPC604E
33464 || rs6000_cpu_attr == CPU_PPC620
33465 || rs6000_cpu_attr == CPU_PPC630
33466 || rs6000_cpu_attr == CPU_PPC750
33467 || rs6000_cpu_attr == CPU_PPC7400
33468 || rs6000_cpu_attr == CPU_PPC7450
33469 || rs6000_cpu_attr == CPU_PPCE5500
33470 || rs6000_cpu_attr == CPU_PPCE6500
33471 || rs6000_cpu_attr == CPU_POWER4
33472 || rs6000_cpu_attr == CPU_POWER5
33473 || rs6000_cpu_attr == CPU_POWER7
33474 || rs6000_cpu_attr == CPU_POWER8
33475 || rs6000_cpu_attr == CPU_POWER9
33476 || rs6000_cpu_attr == CPU_CELL)
33477 && recog_memoized (dep_insn)
33478 && (INSN_CODE (dep_insn) >= 0))
33480 switch (get_attr_type (dep_insn))
33482 case TYPE_CMP:
33483 case TYPE_FPCOMPARE:
33484 case TYPE_CR_LOGICAL:
33485 case TYPE_DELAYED_CR:
33486 return cost + 2;
33487 case TYPE_EXTS:
33488 case TYPE_MUL:
33489 if (get_attr_dot (dep_insn) == DOT_YES)
33490 return cost + 2;
33491 else
33492 break;
33493 case TYPE_SHIFT:
33494 if (get_attr_dot (dep_insn) == DOT_YES
33495 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33496 return cost + 2;
33497 else
33498 break;
33499 default:
33500 break;
33502 break;
33504 case TYPE_STORE:
33505 case TYPE_FPSTORE:
33506 if ((rs6000_cpu == PROCESSOR_POWER6)
33507 && recog_memoized (dep_insn)
33508 && (INSN_CODE (dep_insn) >= 0))
33511 if (GET_CODE (PATTERN (insn)) != SET)
33512 /* If this happens, we have to extend this to schedule
33513 optimally. Return default for now. */
33514 return cost;
33516 /* Adjust the cost for the case where the value written
33517 by a fixed point operation is used as the address
33518 gen value on a store. */
33519 switch (get_attr_type (dep_insn))
33521 case TYPE_LOAD:
33522 case TYPE_CNTLZ:
33524 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33525 return get_attr_sign_extend (dep_insn)
33526 == SIGN_EXTEND_YES ? 6 : 4;
33527 break;
33529 case TYPE_SHIFT:
33531 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33532 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33533 6 : 3;
33534 break;
33536 case TYPE_INTEGER:
33537 case TYPE_ADD:
33538 case TYPE_LOGICAL:
33539 case TYPE_EXTS:
33540 case TYPE_INSERT:
33542 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33543 return 3;
33544 break;
33546 case TYPE_STORE:
33547 case TYPE_FPLOAD:
33548 case TYPE_FPSTORE:
33550 if (get_attr_update (dep_insn) == UPDATE_YES
33551 && ! rs6000_store_data_bypass_p (dep_insn, insn))
33552 return 3;
33553 break;
33555 case TYPE_MUL:
33557 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33558 return 17;
33559 break;
33561 case TYPE_DIV:
33563 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33564 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33565 break;
33567 default:
33568 break;
33571 break;
33573 case TYPE_LOAD:
33574 if ((rs6000_cpu == PROCESSOR_POWER6)
33575 && recog_memoized (dep_insn)
33576 && (INSN_CODE (dep_insn) >= 0))
33579 /* Adjust the cost for the case where the value written
33580 by a fixed point instruction is used within the address
33581 gen portion of a subsequent load(u)(x) */
33582 switch (get_attr_type (dep_insn))
33584 case TYPE_LOAD:
33585 case TYPE_CNTLZ:
33587 if (set_to_load_agen (dep_insn, insn))
33588 return get_attr_sign_extend (dep_insn)
33589 == SIGN_EXTEND_YES ? 6 : 4;
33590 break;
33592 case TYPE_SHIFT:
33594 if (set_to_load_agen (dep_insn, insn))
33595 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33596 6 : 3;
33597 break;
33599 case TYPE_INTEGER:
33600 case TYPE_ADD:
33601 case TYPE_LOGICAL:
33602 case TYPE_EXTS:
33603 case TYPE_INSERT:
33605 if (set_to_load_agen (dep_insn, insn))
33606 return 3;
33607 break;
33609 case TYPE_STORE:
33610 case TYPE_FPLOAD:
33611 case TYPE_FPSTORE:
33613 if (get_attr_update (dep_insn) == UPDATE_YES
33614 && set_to_load_agen (dep_insn, insn))
33615 return 3;
33616 break;
33618 case TYPE_MUL:
33620 if (set_to_load_agen (dep_insn, insn))
33621 return 17;
33622 break;
33624 case TYPE_DIV:
33626 if (set_to_load_agen (dep_insn, insn))
33627 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33628 break;
33630 default:
33631 break;
33634 break;
33636 case TYPE_FPLOAD:
33637 if ((rs6000_cpu == PROCESSOR_POWER6)
33638 && get_attr_update (insn) == UPDATE_NO
33639 && recog_memoized (dep_insn)
33640 && (INSN_CODE (dep_insn) >= 0)
33641 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33642 return 2;
33644 default:
33645 break;
33648 /* Fall out to return default cost. */
33650 break;
33652 case REG_DEP_OUTPUT:
33653 /* Output dependency; DEP_INSN writes a register that INSN writes some
33654 cycles later. */
33655 if ((rs6000_cpu == PROCESSOR_POWER6)
33656 && recog_memoized (dep_insn)
33657 && (INSN_CODE (dep_insn) >= 0))
33659 attr_type = get_attr_type (insn);
33661 switch (attr_type)
33663 case TYPE_FP:
33664 case TYPE_FPSIMPLE:
33665 if (get_attr_type (dep_insn) == TYPE_FP
33666 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33667 return 1;
33668 break;
33669 case TYPE_FPLOAD:
33670 if (get_attr_update (insn) == UPDATE_NO
33671 && get_attr_type (dep_insn) == TYPE_MFFGPR)
33672 return 2;
33673 break;
33674 default:
33675 break;
33678 /* Fall through, no cost for output dependency. */
33679 /* FALLTHRU */
33681 case REG_DEP_ANTI:
33682 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33683 cycles later. */
33684 return 0;
33686 default:
33687 gcc_unreachable ();
33690 return cost;
33693 /* Debug version of rs6000_adjust_cost. */
33695 static int
33696 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33697 int cost, unsigned int dw)
33699 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33701 if (ret != cost)
33703 const char *dep;
33705 switch (dep_type)
33707 default: dep = "unknown depencency"; break;
33708 case REG_DEP_TRUE: dep = "data dependency"; break;
33709 case REG_DEP_OUTPUT: dep = "output dependency"; break;
33710 case REG_DEP_ANTI: dep = "anti depencency"; break;
33713 fprintf (stderr,
33714 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33715 "%s, insn:\n", ret, cost, dep);
33717 debug_rtx (insn);
33720 return ret;
33723 /* The function returns a true if INSN is microcoded.
33724 Return false otherwise. */
33726 static bool
33727 is_microcoded_insn (rtx_insn *insn)
33729 if (!insn || !NONDEBUG_INSN_P (insn)
33730 || GET_CODE (PATTERN (insn)) == USE
33731 || GET_CODE (PATTERN (insn)) == CLOBBER)
33732 return false;
33734 if (rs6000_cpu_attr == CPU_CELL)
33735 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33737 if (rs6000_sched_groups
33738 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33740 enum attr_type type = get_attr_type (insn);
33741 if ((type == TYPE_LOAD
33742 && get_attr_update (insn) == UPDATE_YES
33743 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33744 || ((type == TYPE_LOAD || type == TYPE_STORE)
33745 && get_attr_update (insn) == UPDATE_YES
33746 && get_attr_indexed (insn) == INDEXED_YES)
33747 || type == TYPE_MFCR)
33748 return true;
33751 return false;
33754 /* The function returns true if INSN is cracked into 2 instructions
33755 by the processor (and therefore occupies 2 issue slots). */
33757 static bool
33758 is_cracked_insn (rtx_insn *insn)
33760 if (!insn || !NONDEBUG_INSN_P (insn)
33761 || GET_CODE (PATTERN (insn)) == USE
33762 || GET_CODE (PATTERN (insn)) == CLOBBER)
33763 return false;
33765 if (rs6000_sched_groups
33766 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33768 enum attr_type type = get_attr_type (insn);
33769 if ((type == TYPE_LOAD
33770 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33771 && get_attr_update (insn) == UPDATE_NO)
33772 || (type == TYPE_LOAD
33773 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33774 && get_attr_update (insn) == UPDATE_YES
33775 && get_attr_indexed (insn) == INDEXED_NO)
33776 || (type == TYPE_STORE
33777 && get_attr_update (insn) == UPDATE_YES
33778 && get_attr_indexed (insn) == INDEXED_NO)
33779 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33780 && get_attr_update (insn) == UPDATE_YES)
33781 || type == TYPE_DELAYED_CR
33782 || (type == TYPE_EXTS
33783 && get_attr_dot (insn) == DOT_YES)
33784 || (type == TYPE_SHIFT
33785 && get_attr_dot (insn) == DOT_YES
33786 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33787 || (type == TYPE_MUL
33788 && get_attr_dot (insn) == DOT_YES)
33789 || type == TYPE_DIV
33790 || (type == TYPE_INSERT
33791 && get_attr_size (insn) == SIZE_32))
33792 return true;
33795 return false;
33798 /* The function returns true if INSN can be issued only from
33799 the branch slot. */
33801 static bool
33802 is_branch_slot_insn (rtx_insn *insn)
33804 if (!insn || !NONDEBUG_INSN_P (insn)
33805 || GET_CODE (PATTERN (insn)) == USE
33806 || GET_CODE (PATTERN (insn)) == CLOBBER)
33807 return false;
33809 if (rs6000_sched_groups)
33811 enum attr_type type = get_attr_type (insn);
33812 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33813 return true;
33814 return false;
33817 return false;
33820 /* The function returns true if out_inst sets a value that is
33821 used in the address generation computation of in_insn */
33822 static bool
33823 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33825 rtx out_set, in_set;
33827 /* For performance reasons, only handle the simple case where
33828 both loads are a single_set. */
33829 out_set = single_set (out_insn);
33830 if (out_set)
33832 in_set = single_set (in_insn);
33833 if (in_set)
33834 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33837 return false;
33840 /* Try to determine base/offset/size parts of the given MEM.
33841 Return true if successful, false if all the values couldn't
33842 be determined.
33844 This function only looks for REG or REG+CONST address forms.
33845 REG+REG address form will return false. */
33847 static bool
33848 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33849 HOST_WIDE_INT *size)
33851 rtx addr_rtx;
33852 if MEM_SIZE_KNOWN_P (mem)
33853 *size = MEM_SIZE (mem);
33854 else
33855 return false;
33857 addr_rtx = (XEXP (mem, 0));
33858 if (GET_CODE (addr_rtx) == PRE_MODIFY)
33859 addr_rtx = XEXP (addr_rtx, 1);
33861 *offset = 0;
33862 while (GET_CODE (addr_rtx) == PLUS
33863 && CONST_INT_P (XEXP (addr_rtx, 1)))
33865 *offset += INTVAL (XEXP (addr_rtx, 1));
33866 addr_rtx = XEXP (addr_rtx, 0);
33868 if (!REG_P (addr_rtx))
33869 return false;
33871 *base = addr_rtx;
33872 return true;
33875 /* The function returns true if the target storage location of
33876 mem1 is adjacent to the target storage location of mem2 */
33877 /* Return 1 if memory locations are adjacent. */
33879 static bool
33880 adjacent_mem_locations (rtx mem1, rtx mem2)
33882 rtx reg1, reg2;
33883 HOST_WIDE_INT off1, size1, off2, size2;
33885 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33886 && get_memref_parts (mem2, &reg2, &off2, &size2))
33887 return ((REGNO (reg1) == REGNO (reg2))
33888 && ((off1 + size1 == off2)
33889 || (off2 + size2 == off1)));
33891 return false;
33894 /* This function returns true if it can be determined that the two MEM
33895 locations overlap by at least 1 byte based on base reg/offset/size. */
33897 static bool
33898 mem_locations_overlap (rtx mem1, rtx mem2)
33900 rtx reg1, reg2;
33901 HOST_WIDE_INT off1, size1, off2, size2;
33903 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33904 && get_memref_parts (mem2, &reg2, &off2, &size2))
33905 return ((REGNO (reg1) == REGNO (reg2))
33906 && (((off1 <= off2) && (off1 + size1 > off2))
33907 || ((off2 <= off1) && (off2 + size2 > off1))));
33909 return false;
33912 /* A C statement (sans semicolon) to update the integer scheduling
33913 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33914 INSN earlier, reduce the priority to execute INSN later. Do not
33915 define this macro if you do not need to adjust the scheduling
33916 priorities of insns. */
33918 static int
33919 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33921 rtx load_mem, str_mem;
33922 /* On machines (like the 750) which have asymmetric integer units,
33923 where one integer unit can do multiply and divides and the other
33924 can't, reduce the priority of multiply/divide so it is scheduled
33925 before other integer operations. */
33927 #if 0
33928 if (! INSN_P (insn))
33929 return priority;
33931 if (GET_CODE (PATTERN (insn)) == USE)
33932 return priority;
33934 switch (rs6000_cpu_attr) {
33935 case CPU_PPC750:
33936 switch (get_attr_type (insn))
33938 default:
33939 break;
33941 case TYPE_MUL:
33942 case TYPE_DIV:
33943 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33944 priority, priority);
33945 if (priority >= 0 && priority < 0x01000000)
33946 priority >>= 3;
33947 break;
33950 #endif
33952 if (insn_must_be_first_in_group (insn)
33953 && reload_completed
33954 && current_sched_info->sched_max_insns_priority
33955 && rs6000_sched_restricted_insns_priority)
33958 /* Prioritize insns that can be dispatched only in the first
33959 dispatch slot. */
33960 if (rs6000_sched_restricted_insns_priority == 1)
33961 /* Attach highest priority to insn. This means that in
33962 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33963 precede 'priority' (critical path) considerations. */
33964 return current_sched_info->sched_max_insns_priority;
33965 else if (rs6000_sched_restricted_insns_priority == 2)
33966 /* Increase priority of insn by a minimal amount. This means that in
33967 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33968 considerations precede dispatch-slot restriction considerations. */
33969 return (priority + 1);
33972 if (rs6000_cpu == PROCESSOR_POWER6
33973 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33974 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33975 /* Attach highest priority to insn if the scheduler has just issued two
33976 stores and this instruction is a load, or two loads and this instruction
33977 is a store. Power6 wants loads and stores scheduled alternately
33978 when possible */
33979 return current_sched_info->sched_max_insns_priority;
33981 return priority;
33984 /* Return true if the instruction is nonpipelined on the Cell. */
33985 static bool
33986 is_nonpipeline_insn (rtx_insn *insn)
33988 enum attr_type type;
33989 if (!insn || !NONDEBUG_INSN_P (insn)
33990 || GET_CODE (PATTERN (insn)) == USE
33991 || GET_CODE (PATTERN (insn)) == CLOBBER)
33992 return false;
33994 type = get_attr_type (insn);
33995 if (type == TYPE_MUL
33996 || type == TYPE_DIV
33997 || type == TYPE_SDIV
33998 || type == TYPE_DDIV
33999 || type == TYPE_SSQRT
34000 || type == TYPE_DSQRT
34001 || type == TYPE_MFCR
34002 || type == TYPE_MFCRF
34003 || type == TYPE_MFJMPR)
34005 return true;
34007 return false;
34011 /* Return how many instructions the machine can issue per cycle. */
34013 static int
34014 rs6000_issue_rate (void)
34016 /* Unless scheduling for register pressure, use issue rate of 1 for
34017 first scheduling pass to decrease degradation. */
34018 if (!reload_completed && !flag_sched_pressure)
34019 return 1;
34021 switch (rs6000_cpu_attr) {
34022 case CPU_RS64A:
34023 case CPU_PPC601: /* ? */
34024 case CPU_PPC7450:
34025 return 3;
34026 case CPU_PPC440:
34027 case CPU_PPC603:
34028 case CPU_PPC750:
34029 case CPU_PPC7400:
34030 case CPU_PPC8540:
34031 case CPU_PPC8548:
34032 case CPU_CELL:
34033 case CPU_PPCE300C2:
34034 case CPU_PPCE300C3:
34035 case CPU_PPCE500MC:
34036 case CPU_PPCE500MC64:
34037 case CPU_PPCE5500:
34038 case CPU_PPCE6500:
34039 case CPU_TITAN:
34040 return 2;
34041 case CPU_PPC476:
34042 case CPU_PPC604:
34043 case CPU_PPC604E:
34044 case CPU_PPC620:
34045 case CPU_PPC630:
34046 return 4;
34047 case CPU_POWER4:
34048 case CPU_POWER5:
34049 case CPU_POWER6:
34050 case CPU_POWER7:
34051 return 5;
34052 case CPU_POWER8:
34053 return 7;
34054 case CPU_POWER9:
34055 return 6;
34056 default:
34057 return 1;
34061 /* Return how many instructions to look ahead for better insn
34062 scheduling. */
34064 static int
34065 rs6000_use_sched_lookahead (void)
34067 switch (rs6000_cpu_attr)
34069 case CPU_PPC8540:
34070 case CPU_PPC8548:
34071 return 4;
34073 case CPU_CELL:
34074 return (reload_completed ? 8 : 0);
34076 default:
34077 return 0;
34081 /* We are choosing insn from the ready queue. Return zero if INSN can be
34082 chosen. */
34083 static int
34084 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
34086 if (ready_index == 0)
34087 return 0;
34089 if (rs6000_cpu_attr != CPU_CELL)
34090 return 0;
34092 gcc_assert (insn != NULL_RTX && INSN_P (insn));
34094 if (!reload_completed
34095 || is_nonpipeline_insn (insn)
34096 || is_microcoded_insn (insn))
34097 return 1;
34099 return 0;
34102 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
34103 and return true. */
34105 static bool
34106 find_mem_ref (rtx pat, rtx *mem_ref)
34108 const char * fmt;
34109 int i, j;
34111 /* stack_tie does not produce any real memory traffic. */
34112 if (tie_operand (pat, VOIDmode))
34113 return false;
34115 if (GET_CODE (pat) == MEM)
34117 *mem_ref = pat;
34118 return true;
34121 /* Recursively process the pattern. */
34122 fmt = GET_RTX_FORMAT (GET_CODE (pat));
34124 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
34126 if (fmt[i] == 'e')
34128 if (find_mem_ref (XEXP (pat, i), mem_ref))
34129 return true;
34131 else if (fmt[i] == 'E')
34132 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
34134 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
34135 return true;
34139 return false;
34142 /* Determine if PAT is a PATTERN of a load insn. */
34144 static bool
34145 is_load_insn1 (rtx pat, rtx *load_mem)
34147 if (!pat || pat == NULL_RTX)
34148 return false;
34150 if (GET_CODE (pat) == SET)
34151 return find_mem_ref (SET_SRC (pat), load_mem);
34153 if (GET_CODE (pat) == PARALLEL)
34155 int i;
34157 for (i = 0; i < XVECLEN (pat, 0); i++)
34158 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
34159 return true;
34162 return false;
34165 /* Determine if INSN loads from memory. */
34167 static bool
34168 is_load_insn (rtx insn, rtx *load_mem)
34170 if (!insn || !INSN_P (insn))
34171 return false;
34173 if (CALL_P (insn))
34174 return false;
34176 return is_load_insn1 (PATTERN (insn), load_mem);
34179 /* Determine if PAT is a PATTERN of a store insn. */
34181 static bool
34182 is_store_insn1 (rtx pat, rtx *str_mem)
34184 if (!pat || pat == NULL_RTX)
34185 return false;
34187 if (GET_CODE (pat) == SET)
34188 return find_mem_ref (SET_DEST (pat), str_mem);
34190 if (GET_CODE (pat) == PARALLEL)
34192 int i;
34194 for (i = 0; i < XVECLEN (pat, 0); i++)
34195 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
34196 return true;
34199 return false;
34202 /* Determine if INSN stores to memory. */
34204 static bool
34205 is_store_insn (rtx insn, rtx *str_mem)
34207 if (!insn || !INSN_P (insn))
34208 return false;
34210 return is_store_insn1 (PATTERN (insn), str_mem);
34213 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34215 static bool
34216 is_power9_pairable_vec_type (enum attr_type type)
34218 switch (type)
34220 case TYPE_VECSIMPLE:
34221 case TYPE_VECCOMPLEX:
34222 case TYPE_VECDIV:
34223 case TYPE_VECCMP:
34224 case TYPE_VECPERM:
34225 case TYPE_VECFLOAT:
34226 case TYPE_VECFDIV:
34227 case TYPE_VECDOUBLE:
34228 return true;
34229 default:
34230 break;
34232 return false;
34235 /* Returns whether the dependence between INSN and NEXT is considered
34236 costly by the given target. */
34238 static bool
34239 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
34241 rtx insn;
34242 rtx next;
34243 rtx load_mem, str_mem;
34245 /* If the flag is not enabled - no dependence is considered costly;
34246 allow all dependent insns in the same group.
34247 This is the most aggressive option. */
34248 if (rs6000_sched_costly_dep == no_dep_costly)
34249 return false;
34251 /* If the flag is set to 1 - a dependence is always considered costly;
34252 do not allow dependent instructions in the same group.
34253 This is the most conservative option. */
34254 if (rs6000_sched_costly_dep == all_deps_costly)
34255 return true;
34257 insn = DEP_PRO (dep);
34258 next = DEP_CON (dep);
34260 if (rs6000_sched_costly_dep == store_to_load_dep_costly
34261 && is_load_insn (next, &load_mem)
34262 && is_store_insn (insn, &str_mem))
34263 /* Prevent load after store in the same group. */
34264 return true;
34266 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
34267 && is_load_insn (next, &load_mem)
34268 && is_store_insn (insn, &str_mem)
34269 && DEP_TYPE (dep) == REG_DEP_TRUE
34270 && mem_locations_overlap(str_mem, load_mem))
34271 /* Prevent load after store in the same group if it is a true
34272 dependence. */
34273 return true;
34275 /* The flag is set to X; dependences with latency >= X are considered costly,
34276 and will not be scheduled in the same group. */
34277 if (rs6000_sched_costly_dep <= max_dep_latency
34278 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
34279 return true;
34281 return false;
34284 /* Return the next insn after INSN that is found before TAIL is reached,
34285 skipping any "non-active" insns - insns that will not actually occupy
34286 an issue slot. Return NULL_RTX if such an insn is not found. */
34288 static rtx_insn *
34289 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
34291 if (insn == NULL_RTX || insn == tail)
34292 return NULL;
34294 while (1)
34296 insn = NEXT_INSN (insn);
34297 if (insn == NULL_RTX || insn == tail)
34298 return NULL;
34300 if (CALL_P (insn)
34301 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
34302 || (NONJUMP_INSN_P (insn)
34303 && GET_CODE (PATTERN (insn)) != USE
34304 && GET_CODE (PATTERN (insn)) != CLOBBER
34305 && INSN_CODE (insn) != CODE_FOR_stack_tie))
34306 break;
34308 return insn;
34311 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34313 static int
34314 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
34316 int pos;
34317 int i;
34318 rtx_insn *tmp;
34319 enum attr_type type, type2;
34321 type = get_attr_type (last_scheduled_insn);
34323 /* Try to issue fixed point divides back-to-back in pairs so they will be
34324 routed to separate execution units and execute in parallel. */
34325 if (type == TYPE_DIV && divide_cnt == 0)
34327 /* First divide has been scheduled. */
34328 divide_cnt = 1;
34330 /* Scan the ready list looking for another divide, if found move it
34331 to the end of the list so it is chosen next. */
34332 pos = lastpos;
34333 while (pos >= 0)
34335 if (recog_memoized (ready[pos]) >= 0
34336 && get_attr_type (ready[pos]) == TYPE_DIV)
34338 tmp = ready[pos];
34339 for (i = pos; i < lastpos; i++)
34340 ready[i] = ready[i + 1];
34341 ready[lastpos] = tmp;
34342 break;
34344 pos--;
34347 else
34349 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34350 divide_cnt = 0;
34352 /* The best dispatch throughput for vector and vector load insns can be
34353 achieved by interleaving a vector and vector load such that they'll
34354 dispatch to the same superslice. If this pairing cannot be achieved
34355 then it is best to pair vector insns together and vector load insns
34356 together.
34358 To aid in this pairing, vec_pairing maintains the current state with
34359 the following values:
34361 0 : Initial state, no vecload/vector pairing has been started.
34363 1 : A vecload or vector insn has been issued and a candidate for
34364 pairing has been found and moved to the end of the ready
34365 list. */
34366 if (type == TYPE_VECLOAD)
34368 /* Issued a vecload. */
34369 if (vec_pairing == 0)
34371 int vecload_pos = -1;
34372 /* We issued a single vecload, look for a vector insn to pair it
34373 with. If one isn't found, try to pair another vecload. */
34374 pos = lastpos;
34375 while (pos >= 0)
34377 if (recog_memoized (ready[pos]) >= 0)
34379 type2 = get_attr_type (ready[pos]);
34380 if (is_power9_pairable_vec_type (type2))
34382 /* Found a vector insn to pair with, move it to the
34383 end of the ready list so it is scheduled next. */
34384 tmp = ready[pos];
34385 for (i = pos; i < lastpos; i++)
34386 ready[i] = ready[i + 1];
34387 ready[lastpos] = tmp;
34388 vec_pairing = 1;
34389 return cached_can_issue_more;
34391 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
34392 /* Remember position of first vecload seen. */
34393 vecload_pos = pos;
34395 pos--;
34397 if (vecload_pos >= 0)
34399 /* Didn't find a vector to pair with but did find a vecload,
34400 move it to the end of the ready list. */
34401 tmp = ready[vecload_pos];
34402 for (i = vecload_pos; i < lastpos; i++)
34403 ready[i] = ready[i + 1];
34404 ready[lastpos] = tmp;
34405 vec_pairing = 1;
34406 return cached_can_issue_more;
34410 else if (is_power9_pairable_vec_type (type))
34412 /* Issued a vector operation. */
34413 if (vec_pairing == 0)
34415 int vec_pos = -1;
34416 /* We issued a single vector insn, look for a vecload to pair it
34417 with. If one isn't found, try to pair another vector. */
34418 pos = lastpos;
34419 while (pos >= 0)
34421 if (recog_memoized (ready[pos]) >= 0)
34423 type2 = get_attr_type (ready[pos]);
34424 if (type2 == TYPE_VECLOAD)
34426 /* Found a vecload insn to pair with, move it to the
34427 end of the ready list so it is scheduled next. */
34428 tmp = ready[pos];
34429 for (i = pos; i < lastpos; i++)
34430 ready[i] = ready[i + 1];
34431 ready[lastpos] = tmp;
34432 vec_pairing = 1;
34433 return cached_can_issue_more;
34435 else if (is_power9_pairable_vec_type (type2)
34436 && vec_pos == -1)
34437 /* Remember position of first vector insn seen. */
34438 vec_pos = pos;
34440 pos--;
34442 if (vec_pos >= 0)
34444 /* Didn't find a vecload to pair with but did find a vector
34445 insn, move it to the end of the ready list. */
34446 tmp = ready[vec_pos];
34447 for (i = vec_pos; i < lastpos; i++)
34448 ready[i] = ready[i + 1];
34449 ready[lastpos] = tmp;
34450 vec_pairing = 1;
34451 return cached_can_issue_more;
34456 /* We've either finished a vec/vecload pair, couldn't find an insn to
34457 continue the current pair, or the last insn had nothing to do with
34458 with pairing. In any case, reset the state. */
34459 vec_pairing = 0;
34462 return cached_can_issue_more;
34465 /* We are about to begin issuing insns for this clock cycle. */
34467 static int
34468 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
34469 rtx_insn **ready ATTRIBUTE_UNUSED,
34470 int *pn_ready ATTRIBUTE_UNUSED,
34471 int clock_var ATTRIBUTE_UNUSED)
34473 int n_ready = *pn_ready;
34475 if (sched_verbose)
34476 fprintf (dump, "// rs6000_sched_reorder :\n");
34478 /* Reorder the ready list, if the second to last ready insn
34479 is a nonepipeline insn. */
34480 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
34482 if (is_nonpipeline_insn (ready[n_ready - 1])
34483 && (recog_memoized (ready[n_ready - 2]) > 0))
34484 /* Simply swap first two insns. */
34485 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
34488 if (rs6000_cpu == PROCESSOR_POWER6)
34489 load_store_pendulum = 0;
34491 return rs6000_issue_rate ();
34494 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34496 static int
34497 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34498 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34500 if (sched_verbose)
34501 fprintf (dump, "// rs6000_sched_reorder2 :\n");
34503 /* For Power6, we need to handle some special cases to try and keep the
34504 store queue from overflowing and triggering expensive flushes.
34506 This code monitors how load and store instructions are being issued
34507 and skews the ready list one way or the other to increase the likelihood
34508 that a desired instruction is issued at the proper time.
34510 A couple of things are done. First, we maintain a "load_store_pendulum"
34511 to track the current state of load/store issue.
34513 - If the pendulum is at zero, then no loads or stores have been
34514 issued in the current cycle so we do nothing.
34516 - If the pendulum is 1, then a single load has been issued in this
34517 cycle and we attempt to locate another load in the ready list to
34518 issue with it.
34520 - If the pendulum is -2, then two stores have already been
34521 issued in this cycle, so we increase the priority of the first load
34522 in the ready list to increase it's likelihood of being chosen first
34523 in the next cycle.
34525 - If the pendulum is -1, then a single store has been issued in this
34526 cycle and we attempt to locate another store in the ready list to
34527 issue with it, preferring a store to an adjacent memory location to
34528 facilitate store pairing in the store queue.
34530 - If the pendulum is 2, then two loads have already been
34531 issued in this cycle, so we increase the priority of the first store
34532 in the ready list to increase it's likelihood of being chosen first
34533 in the next cycle.
34535 - If the pendulum < -2 or > 2, then do nothing.
34537 Note: This code covers the most common scenarios. There exist non
34538 load/store instructions which make use of the LSU and which
34539 would need to be accounted for to strictly model the behavior
34540 of the machine. Those instructions are currently unaccounted
34541 for to help minimize compile time overhead of this code.
34543 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34545 int pos;
34546 int i;
34547 rtx_insn *tmp;
34548 rtx load_mem, str_mem;
34550 if (is_store_insn (last_scheduled_insn, &str_mem))
34551 /* Issuing a store, swing the load_store_pendulum to the left */
34552 load_store_pendulum--;
34553 else if (is_load_insn (last_scheduled_insn, &load_mem))
34554 /* Issuing a load, swing the load_store_pendulum to the right */
34555 load_store_pendulum++;
34556 else
34557 return cached_can_issue_more;
34559 /* If the pendulum is balanced, or there is only one instruction on
34560 the ready list, then all is well, so return. */
34561 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34562 return cached_can_issue_more;
34564 if (load_store_pendulum == 1)
34566 /* A load has been issued in this cycle. Scan the ready list
34567 for another load to issue with it */
34568 pos = *pn_ready-1;
34570 while (pos >= 0)
34572 if (is_load_insn (ready[pos], &load_mem))
34574 /* Found a load. Move it to the head of the ready list,
34575 and adjust it's priority so that it is more likely to
34576 stay there */
34577 tmp = ready[pos];
34578 for (i=pos; i<*pn_ready-1; i++)
34579 ready[i] = ready[i + 1];
34580 ready[*pn_ready-1] = tmp;
34582 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34583 INSN_PRIORITY (tmp)++;
34584 break;
34586 pos--;
34589 else if (load_store_pendulum == -2)
34591 /* Two stores have been issued in this cycle. Increase the
34592 priority of the first load in the ready list to favor it for
34593 issuing in the next cycle. */
34594 pos = *pn_ready-1;
34596 while (pos >= 0)
34598 if (is_load_insn (ready[pos], &load_mem)
34599 && !sel_sched_p ()
34600 && INSN_PRIORITY_KNOWN (ready[pos]))
34602 INSN_PRIORITY (ready[pos])++;
34604 /* Adjust the pendulum to account for the fact that a load
34605 was found and increased in priority. This is to prevent
34606 increasing the priority of multiple loads */
34607 load_store_pendulum--;
34609 break;
34611 pos--;
34614 else if (load_store_pendulum == -1)
34616 /* A store has been issued in this cycle. Scan the ready list for
34617 another store to issue with it, preferring a store to an adjacent
34618 memory location */
34619 int first_store_pos = -1;
34621 pos = *pn_ready-1;
34623 while (pos >= 0)
34625 if (is_store_insn (ready[pos], &str_mem))
34627 rtx str_mem2;
34628 /* Maintain the index of the first store found on the
34629 list */
34630 if (first_store_pos == -1)
34631 first_store_pos = pos;
34633 if (is_store_insn (last_scheduled_insn, &str_mem2)
34634 && adjacent_mem_locations (str_mem, str_mem2))
34636 /* Found an adjacent store. Move it to the head of the
34637 ready list, and adjust it's priority so that it is
34638 more likely to stay there */
34639 tmp = ready[pos];
34640 for (i=pos; i<*pn_ready-1; i++)
34641 ready[i] = ready[i + 1];
34642 ready[*pn_ready-1] = tmp;
34644 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34645 INSN_PRIORITY (tmp)++;
34647 first_store_pos = -1;
34649 break;
34652 pos--;
34655 if (first_store_pos >= 0)
34657 /* An adjacent store wasn't found, but a non-adjacent store was,
34658 so move the non-adjacent store to the front of the ready
34659 list, and adjust its priority so that it is more likely to
34660 stay there. */
34661 tmp = ready[first_store_pos];
34662 for (i=first_store_pos; i<*pn_ready-1; i++)
34663 ready[i] = ready[i + 1];
34664 ready[*pn_ready-1] = tmp;
34665 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34666 INSN_PRIORITY (tmp)++;
34669 else if (load_store_pendulum == 2)
34671 /* Two loads have been issued in this cycle. Increase the priority
34672 of the first store in the ready list to favor it for issuing in
34673 the next cycle. */
34674 pos = *pn_ready-1;
34676 while (pos >= 0)
34678 if (is_store_insn (ready[pos], &str_mem)
34679 && !sel_sched_p ()
34680 && INSN_PRIORITY_KNOWN (ready[pos]))
34682 INSN_PRIORITY (ready[pos])++;
34684 /* Adjust the pendulum to account for the fact that a store
34685 was found and increased in priority. This is to prevent
34686 increasing the priority of multiple stores */
34687 load_store_pendulum++;
34689 break;
34691 pos--;
34696 /* Do Power9 dependent reordering if necessary. */
34697 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34698 && recog_memoized (last_scheduled_insn) >= 0)
34699 return power9_sched_reorder2 (ready, *pn_ready - 1);
34701 return cached_can_issue_more;
34704 /* Return whether the presence of INSN causes a dispatch group termination
34705 of group WHICH_GROUP.
34707 If WHICH_GROUP == current_group, this function will return true if INSN
34708 causes the termination of the current group (i.e, the dispatch group to
34709 which INSN belongs). This means that INSN will be the last insn in the
34710 group it belongs to.
34712 If WHICH_GROUP == previous_group, this function will return true if INSN
34713 causes the termination of the previous group (i.e, the dispatch group that
34714 precedes the group to which INSN belongs). This means that INSN will be
34715 the first insn in the group it belongs to). */
34717 static bool
34718 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34720 bool first, last;
34722 if (! insn)
34723 return false;
34725 first = insn_must_be_first_in_group (insn);
34726 last = insn_must_be_last_in_group (insn);
34728 if (first && last)
34729 return true;
34731 if (which_group == current_group)
34732 return last;
34733 else if (which_group == previous_group)
34734 return first;
34736 return false;
34740 static bool
34741 insn_must_be_first_in_group (rtx_insn *insn)
34743 enum attr_type type;
34745 if (!insn
34746 || NOTE_P (insn)
34747 || DEBUG_INSN_P (insn)
34748 || GET_CODE (PATTERN (insn)) == USE
34749 || GET_CODE (PATTERN (insn)) == CLOBBER)
34750 return false;
34752 switch (rs6000_cpu)
34754 case PROCESSOR_POWER5:
34755 if (is_cracked_insn (insn))
34756 return true;
34757 /* FALLTHRU */
34758 case PROCESSOR_POWER4:
34759 if (is_microcoded_insn (insn))
34760 return true;
34762 if (!rs6000_sched_groups)
34763 return false;
34765 type = get_attr_type (insn);
34767 switch (type)
34769 case TYPE_MFCR:
34770 case TYPE_MFCRF:
34771 case TYPE_MTCR:
34772 case TYPE_DELAYED_CR:
34773 case TYPE_CR_LOGICAL:
34774 case TYPE_MTJMPR:
34775 case TYPE_MFJMPR:
34776 case TYPE_DIV:
34777 case TYPE_LOAD_L:
34778 case TYPE_STORE_C:
34779 case TYPE_ISYNC:
34780 case TYPE_SYNC:
34781 return true;
34782 default:
34783 break;
34785 break;
34786 case PROCESSOR_POWER6:
34787 type = get_attr_type (insn);
34789 switch (type)
34791 case TYPE_EXTS:
34792 case TYPE_CNTLZ:
34793 case TYPE_TRAP:
34794 case TYPE_MUL:
34795 case TYPE_INSERT:
34796 case TYPE_FPCOMPARE:
34797 case TYPE_MFCR:
34798 case TYPE_MTCR:
34799 case TYPE_MFJMPR:
34800 case TYPE_MTJMPR:
34801 case TYPE_ISYNC:
34802 case TYPE_SYNC:
34803 case TYPE_LOAD_L:
34804 case TYPE_STORE_C:
34805 return true;
34806 case TYPE_SHIFT:
34807 if (get_attr_dot (insn) == DOT_NO
34808 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34809 return true;
34810 else
34811 break;
34812 case TYPE_DIV:
34813 if (get_attr_size (insn) == SIZE_32)
34814 return true;
34815 else
34816 break;
34817 case TYPE_LOAD:
34818 case TYPE_STORE:
34819 case TYPE_FPLOAD:
34820 case TYPE_FPSTORE:
34821 if (get_attr_update (insn) == UPDATE_YES)
34822 return true;
34823 else
34824 break;
34825 default:
34826 break;
34828 break;
34829 case PROCESSOR_POWER7:
34830 type = get_attr_type (insn);
34832 switch (type)
34834 case TYPE_CR_LOGICAL:
34835 case TYPE_MFCR:
34836 case TYPE_MFCRF:
34837 case TYPE_MTCR:
34838 case TYPE_DIV:
34839 case TYPE_ISYNC:
34840 case TYPE_LOAD_L:
34841 case TYPE_STORE_C:
34842 case TYPE_MFJMPR:
34843 case TYPE_MTJMPR:
34844 return true;
34845 case TYPE_MUL:
34846 case TYPE_SHIFT:
34847 case TYPE_EXTS:
34848 if (get_attr_dot (insn) == DOT_YES)
34849 return true;
34850 else
34851 break;
34852 case TYPE_LOAD:
34853 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34854 || get_attr_update (insn) == UPDATE_YES)
34855 return true;
34856 else
34857 break;
34858 case TYPE_STORE:
34859 case TYPE_FPLOAD:
34860 case TYPE_FPSTORE:
34861 if (get_attr_update (insn) == UPDATE_YES)
34862 return true;
34863 else
34864 break;
34865 default:
34866 break;
34868 break;
34869 case PROCESSOR_POWER8:
34870 type = get_attr_type (insn);
34872 switch (type)
34874 case TYPE_CR_LOGICAL:
34875 case TYPE_DELAYED_CR:
34876 case TYPE_MFCR:
34877 case TYPE_MFCRF:
34878 case TYPE_MTCR:
34879 case TYPE_SYNC:
34880 case TYPE_ISYNC:
34881 case TYPE_LOAD_L:
34882 case TYPE_STORE_C:
34883 case TYPE_VECSTORE:
34884 case TYPE_MFJMPR:
34885 case TYPE_MTJMPR:
34886 return true;
34887 case TYPE_SHIFT:
34888 case TYPE_EXTS:
34889 case TYPE_MUL:
34890 if (get_attr_dot (insn) == DOT_YES)
34891 return true;
34892 else
34893 break;
34894 case TYPE_LOAD:
34895 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34896 || get_attr_update (insn) == UPDATE_YES)
34897 return true;
34898 else
34899 break;
34900 case TYPE_STORE:
34901 if (get_attr_update (insn) == UPDATE_YES
34902 && get_attr_indexed (insn) == INDEXED_YES)
34903 return true;
34904 else
34905 break;
34906 default:
34907 break;
34909 break;
34910 default:
34911 break;
34914 return false;
34917 static bool
34918 insn_must_be_last_in_group (rtx_insn *insn)
34920 enum attr_type type;
34922 if (!insn
34923 || NOTE_P (insn)
34924 || DEBUG_INSN_P (insn)
34925 || GET_CODE (PATTERN (insn)) == USE
34926 || GET_CODE (PATTERN (insn)) == CLOBBER)
34927 return false;
34929 switch (rs6000_cpu) {
34930 case PROCESSOR_POWER4:
34931 case PROCESSOR_POWER5:
34932 if (is_microcoded_insn (insn))
34933 return true;
34935 if (is_branch_slot_insn (insn))
34936 return true;
34938 break;
34939 case PROCESSOR_POWER6:
34940 type = get_attr_type (insn);
34942 switch (type)
34944 case TYPE_EXTS:
34945 case TYPE_CNTLZ:
34946 case TYPE_TRAP:
34947 case TYPE_MUL:
34948 case TYPE_FPCOMPARE:
34949 case TYPE_MFCR:
34950 case TYPE_MTCR:
34951 case TYPE_MFJMPR:
34952 case TYPE_MTJMPR:
34953 case TYPE_ISYNC:
34954 case TYPE_SYNC:
34955 case TYPE_LOAD_L:
34956 case TYPE_STORE_C:
34957 return true;
34958 case TYPE_SHIFT:
34959 if (get_attr_dot (insn) == DOT_NO
34960 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34961 return true;
34962 else
34963 break;
34964 case TYPE_DIV:
34965 if (get_attr_size (insn) == SIZE_32)
34966 return true;
34967 else
34968 break;
34969 default:
34970 break;
34972 break;
34973 case PROCESSOR_POWER7:
34974 type = get_attr_type (insn);
34976 switch (type)
34978 case TYPE_ISYNC:
34979 case TYPE_SYNC:
34980 case TYPE_LOAD_L:
34981 case TYPE_STORE_C:
34982 return true;
34983 case TYPE_LOAD:
34984 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34985 && get_attr_update (insn) == UPDATE_YES)
34986 return true;
34987 else
34988 break;
34989 case TYPE_STORE:
34990 if (get_attr_update (insn) == UPDATE_YES
34991 && get_attr_indexed (insn) == INDEXED_YES)
34992 return true;
34993 else
34994 break;
34995 default:
34996 break;
34998 break;
34999 case PROCESSOR_POWER8:
35000 type = get_attr_type (insn);
35002 switch (type)
35004 case TYPE_MFCR:
35005 case TYPE_MTCR:
35006 case TYPE_ISYNC:
35007 case TYPE_SYNC:
35008 case TYPE_LOAD_L:
35009 case TYPE_STORE_C:
35010 return true;
35011 case TYPE_LOAD:
35012 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
35013 && get_attr_update (insn) == UPDATE_YES)
35014 return true;
35015 else
35016 break;
35017 case TYPE_STORE:
35018 if (get_attr_update (insn) == UPDATE_YES
35019 && get_attr_indexed (insn) == INDEXED_YES)
35020 return true;
35021 else
35022 break;
35023 default:
35024 break;
35026 break;
35027 default:
35028 break;
35031 return false;
35034 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
35035 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
35037 static bool
35038 is_costly_group (rtx *group_insns, rtx next_insn)
35040 int i;
35041 int issue_rate = rs6000_issue_rate ();
35043 for (i = 0; i < issue_rate; i++)
35045 sd_iterator_def sd_it;
35046 dep_t dep;
35047 rtx insn = group_insns[i];
35049 if (!insn)
35050 continue;
35052 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
35054 rtx next = DEP_CON (dep);
35056 if (next == next_insn
35057 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
35058 return true;
35062 return false;
35065 /* Utility of the function redefine_groups.
35066 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
35067 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
35068 to keep it "far" (in a separate group) from GROUP_INSNS, following
35069 one of the following schemes, depending on the value of the flag
35070 -minsert_sched_nops = X:
35071 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
35072 in order to force NEXT_INSN into a separate group.
35073 (2) X < sched_finish_regroup_exact: insert exactly X nops.
35074 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
35075 insertion (has a group just ended, how many vacant issue slots remain in the
35076 last group, and how many dispatch groups were encountered so far). */
35078 static int
35079 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
35080 rtx_insn *next_insn, bool *group_end, int can_issue_more,
35081 int *group_count)
35083 rtx nop;
35084 bool force;
35085 int issue_rate = rs6000_issue_rate ();
35086 bool end = *group_end;
35087 int i;
35089 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
35090 return can_issue_more;
35092 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
35093 return can_issue_more;
35095 force = is_costly_group (group_insns, next_insn);
35096 if (!force)
35097 return can_issue_more;
35099 if (sched_verbose > 6)
35100 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
35101 *group_count ,can_issue_more);
35103 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
35105 if (*group_end)
35106 can_issue_more = 0;
35108 /* Since only a branch can be issued in the last issue_slot, it is
35109 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
35110 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
35111 in this case the last nop will start a new group and the branch
35112 will be forced to the new group. */
35113 if (can_issue_more && !is_branch_slot_insn (next_insn))
35114 can_issue_more--;
35116 /* Do we have a special group ending nop? */
35117 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
35118 || rs6000_cpu_attr == CPU_POWER8)
35120 nop = gen_group_ending_nop ();
35121 emit_insn_before (nop, next_insn);
35122 can_issue_more = 0;
35124 else
35125 while (can_issue_more > 0)
35127 nop = gen_nop ();
35128 emit_insn_before (nop, next_insn);
35129 can_issue_more--;
35132 *group_end = true;
35133 return 0;
35136 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
35138 int n_nops = rs6000_sched_insert_nops;
35140 /* Nops can't be issued from the branch slot, so the effective
35141 issue_rate for nops is 'issue_rate - 1'. */
35142 if (can_issue_more == 0)
35143 can_issue_more = issue_rate;
35144 can_issue_more--;
35145 if (can_issue_more == 0)
35147 can_issue_more = issue_rate - 1;
35148 (*group_count)++;
35149 end = true;
35150 for (i = 0; i < issue_rate; i++)
35152 group_insns[i] = 0;
35156 while (n_nops > 0)
35158 nop = gen_nop ();
35159 emit_insn_before (nop, next_insn);
35160 if (can_issue_more == issue_rate - 1) /* new group begins */
35161 end = false;
35162 can_issue_more--;
35163 if (can_issue_more == 0)
35165 can_issue_more = issue_rate - 1;
35166 (*group_count)++;
35167 end = true;
35168 for (i = 0; i < issue_rate; i++)
35170 group_insns[i] = 0;
35173 n_nops--;
35176 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35177 can_issue_more++;
35179 /* Is next_insn going to start a new group? */
35180 *group_end
35181 = (end
35182 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35183 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35184 || (can_issue_more < issue_rate &&
35185 insn_terminates_group_p (next_insn, previous_group)));
35186 if (*group_end && end)
35187 (*group_count)--;
35189 if (sched_verbose > 6)
35190 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
35191 *group_count, can_issue_more);
35192 return can_issue_more;
35195 return can_issue_more;
35198 /* This function tries to synch the dispatch groups that the compiler "sees"
35199 with the dispatch groups that the processor dispatcher is expected to
35200 form in practice. It tries to achieve this synchronization by forcing the
35201 estimated processor grouping on the compiler (as opposed to the function
35202 'pad_goups' which tries to force the scheduler's grouping on the processor).
35204 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35205 examines the (estimated) dispatch groups that will be formed by the processor
35206 dispatcher. It marks these group boundaries to reflect the estimated
35207 processor grouping, overriding the grouping that the scheduler had marked.
35208 Depending on the value of the flag '-minsert-sched-nops' this function can
35209 force certain insns into separate groups or force a certain distance between
35210 them by inserting nops, for example, if there exists a "costly dependence"
35211 between the insns.
35213 The function estimates the group boundaries that the processor will form as
35214 follows: It keeps track of how many vacant issue slots are available after
35215 each insn. A subsequent insn will start a new group if one of the following
35216 4 cases applies:
35217 - no more vacant issue slots remain in the current dispatch group.
35218 - only the last issue slot, which is the branch slot, is vacant, but the next
35219 insn is not a branch.
35220 - only the last 2 or less issue slots, including the branch slot, are vacant,
35221 which means that a cracked insn (which occupies two issue slots) can't be
35222 issued in this group.
35223 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35224 start a new group. */
35226 static int
35227 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35228 rtx_insn *tail)
35230 rtx_insn *insn, *next_insn;
35231 int issue_rate;
35232 int can_issue_more;
35233 int slot, i;
35234 bool group_end;
35235 int group_count = 0;
35236 rtx *group_insns;
35238 /* Initialize. */
35239 issue_rate = rs6000_issue_rate ();
35240 group_insns = XALLOCAVEC (rtx, issue_rate);
35241 for (i = 0; i < issue_rate; i++)
35243 group_insns[i] = 0;
35245 can_issue_more = issue_rate;
35246 slot = 0;
35247 insn = get_next_active_insn (prev_head_insn, tail);
35248 group_end = false;
35250 while (insn != NULL_RTX)
35252 slot = (issue_rate - can_issue_more);
35253 group_insns[slot] = insn;
35254 can_issue_more =
35255 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35256 if (insn_terminates_group_p (insn, current_group))
35257 can_issue_more = 0;
35259 next_insn = get_next_active_insn (insn, tail);
35260 if (next_insn == NULL_RTX)
35261 return group_count + 1;
35263 /* Is next_insn going to start a new group? */
35264 group_end
35265 = (can_issue_more == 0
35266 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35267 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35268 || (can_issue_more < issue_rate &&
35269 insn_terminates_group_p (next_insn, previous_group)));
35271 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
35272 next_insn, &group_end, can_issue_more,
35273 &group_count);
35275 if (group_end)
35277 group_count++;
35278 can_issue_more = 0;
35279 for (i = 0; i < issue_rate; i++)
35281 group_insns[i] = 0;
35285 if (GET_MODE (next_insn) == TImode && can_issue_more)
35286 PUT_MODE (next_insn, VOIDmode);
35287 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
35288 PUT_MODE (next_insn, TImode);
35290 insn = next_insn;
35291 if (can_issue_more == 0)
35292 can_issue_more = issue_rate;
35293 } /* while */
35295 return group_count;
35298 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35299 dispatch group boundaries that the scheduler had marked. Pad with nops
35300 any dispatch groups which have vacant issue slots, in order to force the
35301 scheduler's grouping on the processor dispatcher. The function
35302 returns the number of dispatch groups found. */
35304 static int
35305 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35306 rtx_insn *tail)
35308 rtx_insn *insn, *next_insn;
35309 rtx nop;
35310 int issue_rate;
35311 int can_issue_more;
35312 int group_end;
35313 int group_count = 0;
35315 /* Initialize issue_rate. */
35316 issue_rate = rs6000_issue_rate ();
35317 can_issue_more = issue_rate;
35319 insn = get_next_active_insn (prev_head_insn, tail);
35320 next_insn = get_next_active_insn (insn, tail);
35322 while (insn != NULL_RTX)
35324 can_issue_more =
35325 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35327 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
35329 if (next_insn == NULL_RTX)
35330 break;
35332 if (group_end)
35334 /* If the scheduler had marked group termination at this location
35335 (between insn and next_insn), and neither insn nor next_insn will
35336 force group termination, pad the group with nops to force group
35337 termination. */
35338 if (can_issue_more
35339 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
35340 && !insn_terminates_group_p (insn, current_group)
35341 && !insn_terminates_group_p (next_insn, previous_group))
35343 if (!is_branch_slot_insn (next_insn))
35344 can_issue_more--;
35346 while (can_issue_more)
35348 nop = gen_nop ();
35349 emit_insn_before (nop, next_insn);
35350 can_issue_more--;
35354 can_issue_more = issue_rate;
35355 group_count++;
35358 insn = next_insn;
35359 next_insn = get_next_active_insn (insn, tail);
35362 return group_count;
35365 /* We're beginning a new block. Initialize data structures as necessary. */
35367 static void
35368 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
35369 int sched_verbose ATTRIBUTE_UNUSED,
35370 int max_ready ATTRIBUTE_UNUSED)
35372 last_scheduled_insn = NULL;
35373 load_store_pendulum = 0;
35374 divide_cnt = 0;
35375 vec_pairing = 0;
35378 /* The following function is called at the end of scheduling BB.
35379 After reload, it inserts nops at insn group bundling. */
35381 static void
35382 rs6000_sched_finish (FILE *dump, int sched_verbose)
35384 int n_groups;
35386 if (sched_verbose)
35387 fprintf (dump, "=== Finishing schedule.\n");
35389 if (reload_completed && rs6000_sched_groups)
35391 /* Do not run sched_finish hook when selective scheduling enabled. */
35392 if (sel_sched_p ())
35393 return;
35395 if (rs6000_sched_insert_nops == sched_finish_none)
35396 return;
35398 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
35399 n_groups = pad_groups (dump, sched_verbose,
35400 current_sched_info->prev_head,
35401 current_sched_info->next_tail);
35402 else
35403 n_groups = redefine_groups (dump, sched_verbose,
35404 current_sched_info->prev_head,
35405 current_sched_info->next_tail);
35407 if (sched_verbose >= 6)
35409 fprintf (dump, "ngroups = %d\n", n_groups);
35410 print_rtl (dump, current_sched_info->prev_head);
35411 fprintf (dump, "Done finish_sched\n");
35416 struct rs6000_sched_context
35418 short cached_can_issue_more;
35419 rtx_insn *last_scheduled_insn;
35420 int load_store_pendulum;
35421 int divide_cnt;
35422 int vec_pairing;
35425 typedef struct rs6000_sched_context rs6000_sched_context_def;
35426 typedef rs6000_sched_context_def *rs6000_sched_context_t;
35428 /* Allocate store for new scheduling context. */
35429 static void *
35430 rs6000_alloc_sched_context (void)
35432 return xmalloc (sizeof (rs6000_sched_context_def));
35435 /* If CLEAN_P is true then initializes _SC with clean data,
35436 and from the global context otherwise. */
35437 static void
35438 rs6000_init_sched_context (void *_sc, bool clean_p)
35440 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35442 if (clean_p)
35444 sc->cached_can_issue_more = 0;
35445 sc->last_scheduled_insn = NULL;
35446 sc->load_store_pendulum = 0;
35447 sc->divide_cnt = 0;
35448 sc->vec_pairing = 0;
35450 else
35452 sc->cached_can_issue_more = cached_can_issue_more;
35453 sc->last_scheduled_insn = last_scheduled_insn;
35454 sc->load_store_pendulum = load_store_pendulum;
35455 sc->divide_cnt = divide_cnt;
35456 sc->vec_pairing = vec_pairing;
35460 /* Sets the global scheduling context to the one pointed to by _SC. */
35461 static void
35462 rs6000_set_sched_context (void *_sc)
35464 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35466 gcc_assert (sc != NULL);
35468 cached_can_issue_more = sc->cached_can_issue_more;
35469 last_scheduled_insn = sc->last_scheduled_insn;
35470 load_store_pendulum = sc->load_store_pendulum;
35471 divide_cnt = sc->divide_cnt;
35472 vec_pairing = sc->vec_pairing;
35475 /* Free _SC. */
35476 static void
35477 rs6000_free_sched_context (void *_sc)
35479 gcc_assert (_sc != NULL);
35481 free (_sc);
35484 static bool
35485 rs6000_sched_can_speculate_insn (rtx_insn *insn)
35487 switch (get_attr_type (insn))
35489 case TYPE_DIV:
35490 case TYPE_SDIV:
35491 case TYPE_DDIV:
35492 case TYPE_VECDIV:
35493 case TYPE_SSQRT:
35494 case TYPE_DSQRT:
35495 return false;
35497 default:
35498 return true;
35502 /* Length in units of the trampoline for entering a nested function. */
35505 rs6000_trampoline_size (void)
35507 int ret = 0;
35509 switch (DEFAULT_ABI)
35511 default:
35512 gcc_unreachable ();
35514 case ABI_AIX:
35515 ret = (TARGET_32BIT) ? 12 : 24;
35516 break;
35518 case ABI_ELFv2:
35519 gcc_assert (!TARGET_32BIT);
35520 ret = 32;
35521 break;
35523 case ABI_DARWIN:
35524 case ABI_V4:
35525 ret = (TARGET_32BIT) ? 40 : 48;
35526 break;
35529 return ret;
35532 /* Emit RTL insns to initialize the variable parts of a trampoline.
35533 FNADDR is an RTX for the address of the function's pure code.
35534 CXT is an RTX for the static chain value for the function. */
35536 static void
35537 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35539 int regsize = (TARGET_32BIT) ? 4 : 8;
35540 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35541 rtx ctx_reg = force_reg (Pmode, cxt);
35542 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35544 switch (DEFAULT_ABI)
35546 default:
35547 gcc_unreachable ();
35549 /* Under AIX, just build the 3 word function descriptor */
35550 case ABI_AIX:
35552 rtx fnmem, fn_reg, toc_reg;
35554 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35555 error ("You cannot take the address of a nested function if you use "
35556 "the -mno-pointers-to-nested-functions option.");
35558 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35559 fn_reg = gen_reg_rtx (Pmode);
35560 toc_reg = gen_reg_rtx (Pmode);
35562 /* Macro to shorten the code expansions below. */
35563 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35565 m_tramp = replace_equiv_address (m_tramp, addr);
35567 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35568 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35569 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35570 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35571 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35573 # undef MEM_PLUS
35575 break;
35577 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35578 case ABI_ELFv2:
35579 case ABI_DARWIN:
35580 case ABI_V4:
35581 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35582 LCT_NORMAL, VOIDmode,
35583 addr, Pmode,
35584 GEN_INT (rs6000_trampoline_size ()), SImode,
35585 fnaddr, Pmode,
35586 ctx_reg, Pmode);
35587 break;
35592 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35593 identifier as an argument, so the front end shouldn't look it up. */
35595 static bool
35596 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35598 return is_attribute_p ("altivec", attr_id);
35601 /* Handle the "altivec" attribute. The attribute may have
35602 arguments as follows:
35604 __attribute__((altivec(vector__)))
35605 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35606 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35608 and may appear more than once (e.g., 'vector bool char') in a
35609 given declaration. */
35611 static tree
35612 rs6000_handle_altivec_attribute (tree *node,
35613 tree name ATTRIBUTE_UNUSED,
35614 tree args,
35615 int flags ATTRIBUTE_UNUSED,
35616 bool *no_add_attrs)
35618 tree type = *node, result = NULL_TREE;
35619 machine_mode mode;
35620 int unsigned_p;
35621 char altivec_type
35622 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35623 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35624 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35625 : '?');
35627 while (POINTER_TYPE_P (type)
35628 || TREE_CODE (type) == FUNCTION_TYPE
35629 || TREE_CODE (type) == METHOD_TYPE
35630 || TREE_CODE (type) == ARRAY_TYPE)
35631 type = TREE_TYPE (type);
35633 mode = TYPE_MODE (type);
35635 /* Check for invalid AltiVec type qualifiers. */
35636 if (type == long_double_type_node)
35637 error ("use of %<long double%> in AltiVec types is invalid");
35638 else if (type == boolean_type_node)
35639 error ("use of boolean types in AltiVec types is invalid");
35640 else if (TREE_CODE (type) == COMPLEX_TYPE)
35641 error ("use of %<complex%> in AltiVec types is invalid");
35642 else if (DECIMAL_FLOAT_MODE_P (mode))
35643 error ("use of decimal floating point types in AltiVec types is invalid");
35644 else if (!TARGET_VSX)
35646 if (type == long_unsigned_type_node || type == long_integer_type_node)
35648 if (TARGET_64BIT)
35649 error ("use of %<long%> in AltiVec types is invalid for "
35650 "64-bit code without -mvsx");
35651 else if (rs6000_warn_altivec_long)
35652 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35653 "use %<int%>");
35655 else if (type == long_long_unsigned_type_node
35656 || type == long_long_integer_type_node)
35657 error ("use of %<long long%> in AltiVec types is invalid without "
35658 "-mvsx");
35659 else if (type == double_type_node)
35660 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35663 switch (altivec_type)
35665 case 'v':
35666 unsigned_p = TYPE_UNSIGNED (type);
35667 switch (mode)
35669 case E_TImode:
35670 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35671 break;
35672 case E_DImode:
35673 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35674 break;
35675 case E_SImode:
35676 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35677 break;
35678 case E_HImode:
35679 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35680 break;
35681 case E_QImode:
35682 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35683 break;
35684 case E_SFmode: result = V4SF_type_node; break;
35685 case E_DFmode: result = V2DF_type_node; break;
35686 /* If the user says 'vector int bool', we may be handed the 'bool'
35687 attribute _before_ the 'vector' attribute, and so select the
35688 proper type in the 'b' case below. */
35689 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
35690 case E_V2DImode: case E_V2DFmode:
35691 result = type;
35692 default: break;
35694 break;
35695 case 'b':
35696 switch (mode)
35698 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
35699 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
35700 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
35701 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
35702 default: break;
35704 break;
35705 case 'p':
35706 switch (mode)
35708 case E_V8HImode: result = pixel_V8HI_type_node;
35709 default: break;
35711 default: break;
35714 /* Propagate qualifiers attached to the element type
35715 onto the vector type. */
35716 if (result && result != type && TYPE_QUALS (type))
35717 result = build_qualified_type (result, TYPE_QUALS (type));
35719 *no_add_attrs = true; /* No need to hang on to the attribute. */
35721 if (result)
35722 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35724 return NULL_TREE;
35727 /* AltiVec defines four built-in scalar types that serve as vector
35728 elements; we must teach the compiler how to mangle them. */
35730 static const char *
35731 rs6000_mangle_type (const_tree type)
35733 type = TYPE_MAIN_VARIANT (type);
35735 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35736 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35737 return NULL;
35739 if (type == bool_char_type_node) return "U6__boolc";
35740 if (type == bool_short_type_node) return "U6__bools";
35741 if (type == pixel_type_node) return "u7__pixel";
35742 if (type == bool_int_type_node) return "U6__booli";
35743 if (type == bool_long_type_node) return "U6__booll";
35745 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35746 "g" for IBM extended double, no matter whether it is long double (using
35747 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35748 if (TARGET_FLOAT128_TYPE)
35750 if (type == ieee128_float_type_node)
35751 return "U10__float128";
35753 if (type == ibm128_float_type_node)
35754 return "g";
35756 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35757 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35760 /* Mangle IBM extended float long double as `g' (__float128) on
35761 powerpc*-linux where long-double-64 previously was the default. */
35762 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35763 && TARGET_ELF
35764 && TARGET_LONG_DOUBLE_128
35765 && !TARGET_IEEEQUAD)
35766 return "g";
35768 /* For all other types, use normal C++ mangling. */
35769 return NULL;
35772 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35773 struct attribute_spec.handler. */
35775 static tree
35776 rs6000_handle_longcall_attribute (tree *node, tree name,
35777 tree args ATTRIBUTE_UNUSED,
35778 int flags ATTRIBUTE_UNUSED,
35779 bool *no_add_attrs)
35781 if (TREE_CODE (*node) != FUNCTION_TYPE
35782 && TREE_CODE (*node) != FIELD_DECL
35783 && TREE_CODE (*node) != TYPE_DECL)
35785 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35786 name);
35787 *no_add_attrs = true;
35790 return NULL_TREE;
35793 /* Set longcall attributes on all functions declared when
35794 rs6000_default_long_calls is true. */
35795 static void
35796 rs6000_set_default_type_attributes (tree type)
35798 if (rs6000_default_long_calls
35799 && (TREE_CODE (type) == FUNCTION_TYPE
35800 || TREE_CODE (type) == METHOD_TYPE))
35801 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35802 NULL_TREE,
35803 TYPE_ATTRIBUTES (type));
35805 #if TARGET_MACHO
35806 darwin_set_default_type_attributes (type);
35807 #endif
35810 /* Return a reference suitable for calling a function with the
35811 longcall attribute. */
35814 rs6000_longcall_ref (rtx call_ref)
35816 const char *call_name;
35817 tree node;
35819 if (GET_CODE (call_ref) != SYMBOL_REF)
35820 return call_ref;
35822 /* System V adds '.' to the internal name, so skip them. */
35823 call_name = XSTR (call_ref, 0);
35824 if (*call_name == '.')
35826 while (*call_name == '.')
35827 call_name++;
35829 node = get_identifier (call_name);
35830 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35833 return force_reg (Pmode, call_ref);
35836 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35837 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35838 #endif
35840 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35841 struct attribute_spec.handler. */
35842 static tree
35843 rs6000_handle_struct_attribute (tree *node, tree name,
35844 tree args ATTRIBUTE_UNUSED,
35845 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35847 tree *type = NULL;
35848 if (DECL_P (*node))
35850 if (TREE_CODE (*node) == TYPE_DECL)
35851 type = &TREE_TYPE (*node);
35853 else
35854 type = node;
35856 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35857 || TREE_CODE (*type) == UNION_TYPE)))
35859 warning (OPT_Wattributes, "%qE attribute ignored", name);
35860 *no_add_attrs = true;
35863 else if ((is_attribute_p ("ms_struct", name)
35864 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35865 || ((is_attribute_p ("gcc_struct", name)
35866 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35868 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35869 name);
35870 *no_add_attrs = true;
35873 return NULL_TREE;
35876 static bool
35877 rs6000_ms_bitfield_layout_p (const_tree record_type)
35879 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35880 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35881 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35884 #ifdef USING_ELFOS_H
35886 /* A get_unnamed_section callback, used for switching to toc_section. */
35888 static void
35889 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35891 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35892 && TARGET_MINIMAL_TOC)
35894 if (!toc_initialized)
35896 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35897 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35898 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35899 fprintf (asm_out_file, "\t.tc ");
35900 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35901 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35902 fprintf (asm_out_file, "\n");
35904 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35905 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35906 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35907 fprintf (asm_out_file, " = .+32768\n");
35908 toc_initialized = 1;
35910 else
35911 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35913 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35915 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35916 if (!toc_initialized)
35918 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35919 toc_initialized = 1;
35922 else
35924 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35925 if (!toc_initialized)
35927 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35928 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35929 fprintf (asm_out_file, " = .+32768\n");
35930 toc_initialized = 1;
35935 /* Implement TARGET_ASM_INIT_SECTIONS. */
35937 static void
35938 rs6000_elf_asm_init_sections (void)
35940 toc_section
35941 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35943 sdata2_section
35944 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35945 SDATA2_SECTION_ASM_OP);
35948 /* Implement TARGET_SELECT_RTX_SECTION. */
35950 static section *
35951 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35952 unsigned HOST_WIDE_INT align)
35954 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35955 return toc_section;
35956 else
35957 return default_elf_select_rtx_section (mode, x, align);
35960 /* For a SYMBOL_REF, set generic flags and then perform some
35961 target-specific processing.
35963 When the AIX ABI is requested on a non-AIX system, replace the
35964 function name with the real name (with a leading .) rather than the
35965 function descriptor name. This saves a lot of overriding code to
35966 read the prefixes. */
35968 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35969 static void
35970 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35972 default_encode_section_info (decl, rtl, first);
35974 if (first
35975 && TREE_CODE (decl) == FUNCTION_DECL
35976 && !TARGET_AIX
35977 && DEFAULT_ABI == ABI_AIX)
35979 rtx sym_ref = XEXP (rtl, 0);
35980 size_t len = strlen (XSTR (sym_ref, 0));
35981 char *str = XALLOCAVEC (char, len + 2);
35982 str[0] = '.';
35983 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35984 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35988 static inline bool
35989 compare_section_name (const char *section, const char *templ)
35991 int len;
35993 len = strlen (templ);
35994 return (strncmp (section, templ, len) == 0
35995 && (section[len] == 0 || section[len] == '.'));
35998 bool
35999 rs6000_elf_in_small_data_p (const_tree decl)
36001 if (rs6000_sdata == SDATA_NONE)
36002 return false;
36004 /* We want to merge strings, so we never consider them small data. */
36005 if (TREE_CODE (decl) == STRING_CST)
36006 return false;
36008 /* Functions are never in the small data area. */
36009 if (TREE_CODE (decl) == FUNCTION_DECL)
36010 return false;
36012 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
36014 const char *section = DECL_SECTION_NAME (decl);
36015 if (compare_section_name (section, ".sdata")
36016 || compare_section_name (section, ".sdata2")
36017 || compare_section_name (section, ".gnu.linkonce.s")
36018 || compare_section_name (section, ".sbss")
36019 || compare_section_name (section, ".sbss2")
36020 || compare_section_name (section, ".gnu.linkonce.sb")
36021 || strcmp (section, ".PPC.EMB.sdata0") == 0
36022 || strcmp (section, ".PPC.EMB.sbss0") == 0)
36023 return true;
36025 else
36027 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
36029 if (size > 0
36030 && size <= g_switch_value
36031 /* If it's not public, and we're not going to reference it there,
36032 there's no need to put it in the small data section. */
36033 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
36034 return true;
36037 return false;
36040 #endif /* USING_ELFOS_H */
36042 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
36044 static bool
36045 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
36047 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
36050 /* Do not place thread-local symbols refs in the object blocks. */
36052 static bool
36053 rs6000_use_blocks_for_decl_p (const_tree decl)
36055 return !DECL_THREAD_LOCAL_P (decl);
36058 /* Return a REG that occurs in ADDR with coefficient 1.
36059 ADDR can be effectively incremented by incrementing REG.
36061 r0 is special and we must not select it as an address
36062 register by this routine since our caller will try to
36063 increment the returned register via an "la" instruction. */
36066 find_addr_reg (rtx addr)
36068 while (GET_CODE (addr) == PLUS)
36070 if (GET_CODE (XEXP (addr, 0)) == REG
36071 && REGNO (XEXP (addr, 0)) != 0)
36072 addr = XEXP (addr, 0);
36073 else if (GET_CODE (XEXP (addr, 1)) == REG
36074 && REGNO (XEXP (addr, 1)) != 0)
36075 addr = XEXP (addr, 1);
36076 else if (CONSTANT_P (XEXP (addr, 0)))
36077 addr = XEXP (addr, 1);
36078 else if (CONSTANT_P (XEXP (addr, 1)))
36079 addr = XEXP (addr, 0);
36080 else
36081 gcc_unreachable ();
36083 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
36084 return addr;
36087 void
36088 rs6000_fatal_bad_address (rtx op)
36090 fatal_insn ("bad address", op);
36093 #if TARGET_MACHO
36095 typedef struct branch_island_d {
36096 tree function_name;
36097 tree label_name;
36098 int line_number;
36099 } branch_island;
36102 static vec<branch_island, va_gc> *branch_islands;
36104 /* Remember to generate a branch island for far calls to the given
36105 function. */
36107 static void
36108 add_compiler_branch_island (tree label_name, tree function_name,
36109 int line_number)
36111 branch_island bi = {function_name, label_name, line_number};
36112 vec_safe_push (branch_islands, bi);
36115 /* Generate far-jump branch islands for everything recorded in
36116 branch_islands. Invoked immediately after the last instruction of
36117 the epilogue has been emitted; the branch islands must be appended
36118 to, and contiguous with, the function body. Mach-O stubs are
36119 generated in machopic_output_stub(). */
36121 static void
36122 macho_branch_islands (void)
36124 char tmp_buf[512];
36126 while (!vec_safe_is_empty (branch_islands))
36128 branch_island *bi = &branch_islands->last ();
36129 const char *label = IDENTIFIER_POINTER (bi->label_name);
36130 const char *name = IDENTIFIER_POINTER (bi->function_name);
36131 char name_buf[512];
36132 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36133 if (name[0] == '*' || name[0] == '&')
36134 strcpy (name_buf, name+1);
36135 else
36137 name_buf[0] = '_';
36138 strcpy (name_buf+1, name);
36140 strcpy (tmp_buf, "\n");
36141 strcat (tmp_buf, label);
36142 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36143 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36144 dbxout_stabd (N_SLINE, bi->line_number);
36145 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36146 if (flag_pic)
36148 if (TARGET_LINK_STACK)
36150 char name[32];
36151 get_ppc476_thunk_name (name);
36152 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
36153 strcat (tmp_buf, name);
36154 strcat (tmp_buf, "\n");
36155 strcat (tmp_buf, label);
36156 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36158 else
36160 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
36161 strcat (tmp_buf, label);
36162 strcat (tmp_buf, "_pic\n");
36163 strcat (tmp_buf, label);
36164 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36167 strcat (tmp_buf, "\taddis r11,r11,ha16(");
36168 strcat (tmp_buf, name_buf);
36169 strcat (tmp_buf, " - ");
36170 strcat (tmp_buf, label);
36171 strcat (tmp_buf, "_pic)\n");
36173 strcat (tmp_buf, "\tmtlr r0\n");
36175 strcat (tmp_buf, "\taddi r12,r11,lo16(");
36176 strcat (tmp_buf, name_buf);
36177 strcat (tmp_buf, " - ");
36178 strcat (tmp_buf, label);
36179 strcat (tmp_buf, "_pic)\n");
36181 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
36183 else
36185 strcat (tmp_buf, ":\nlis r12,hi16(");
36186 strcat (tmp_buf, name_buf);
36187 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
36188 strcat (tmp_buf, name_buf);
36189 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
36191 output_asm_insn (tmp_buf, 0);
36192 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36193 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36194 dbxout_stabd (N_SLINE, bi->line_number);
36195 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36196 branch_islands->pop ();
36200 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36201 already there or not. */
36203 static int
36204 no_previous_def (tree function_name)
36206 branch_island *bi;
36207 unsigned ix;
36209 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36210 if (function_name == bi->function_name)
36211 return 0;
36212 return 1;
36215 /* GET_PREV_LABEL gets the label name from the previous definition of
36216 the function. */
36218 static tree
36219 get_prev_label (tree function_name)
36221 branch_island *bi;
36222 unsigned ix;
36224 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36225 if (function_name == bi->function_name)
36226 return bi->label_name;
36227 return NULL_TREE;
36230 /* INSN is either a function call or a millicode call. It may have an
36231 unconditional jump in its delay slot.
36233 CALL_DEST is the routine we are calling. */
36235 char *
36236 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
36237 int cookie_operand_number)
36239 static char buf[256];
36240 if (darwin_emit_branch_islands
36241 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
36242 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
36244 tree labelname;
36245 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
36247 if (no_previous_def (funname))
36249 rtx label_rtx = gen_label_rtx ();
36250 char *label_buf, temp_buf[256];
36251 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
36252 CODE_LABEL_NUMBER (label_rtx));
36253 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
36254 labelname = get_identifier (label_buf);
36255 add_compiler_branch_island (labelname, funname, insn_line (insn));
36257 else
36258 labelname = get_prev_label (funname);
36260 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36261 instruction will reach 'foo', otherwise link as 'bl L42'".
36262 "L42" should be a 'branch island', that will do a far jump to
36263 'foo'. Branch islands are generated in
36264 macho_branch_islands(). */
36265 sprintf (buf, "jbsr %%z%d,%.246s",
36266 dest_operand_number, IDENTIFIER_POINTER (labelname));
36268 else
36269 sprintf (buf, "bl %%z%d", dest_operand_number);
36270 return buf;
36273 /* Generate PIC and indirect symbol stubs. */
36275 void
36276 machopic_output_stub (FILE *file, const char *symb, const char *stub)
36278 unsigned int length;
36279 char *symbol_name, *lazy_ptr_name;
36280 char *local_label_0;
36281 static int label = 0;
36283 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36284 symb = (*targetm.strip_name_encoding) (symb);
36287 length = strlen (symb);
36288 symbol_name = XALLOCAVEC (char, length + 32);
36289 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
36291 lazy_ptr_name = XALLOCAVEC (char, length + 32);
36292 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
36294 if (flag_pic == 2)
36295 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
36296 else
36297 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
36299 if (flag_pic == 2)
36301 fprintf (file, "\t.align 5\n");
36303 fprintf (file, "%s:\n", stub);
36304 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36306 label++;
36307 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36308 sprintf (local_label_0, "\"L%011d$spb\"", label);
36310 fprintf (file, "\tmflr r0\n");
36311 if (TARGET_LINK_STACK)
36313 char name[32];
36314 get_ppc476_thunk_name (name);
36315 fprintf (file, "\tbl %s\n", name);
36316 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36318 else
36320 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
36321 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36323 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
36324 lazy_ptr_name, local_label_0);
36325 fprintf (file, "\tmtlr r0\n");
36326 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
36327 (TARGET_64BIT ? "ldu" : "lwzu"),
36328 lazy_ptr_name, local_label_0);
36329 fprintf (file, "\tmtctr r12\n");
36330 fprintf (file, "\tbctr\n");
36332 else
36334 fprintf (file, "\t.align 4\n");
36336 fprintf (file, "%s:\n", stub);
36337 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36339 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
36340 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
36341 (TARGET_64BIT ? "ldu" : "lwzu"),
36342 lazy_ptr_name);
36343 fprintf (file, "\tmtctr r12\n");
36344 fprintf (file, "\tbctr\n");
36347 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
36348 fprintf (file, "%s:\n", lazy_ptr_name);
36349 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36350 fprintf (file, "%sdyld_stub_binding_helper\n",
36351 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
36354 /* Legitimize PIC addresses. If the address is already
36355 position-independent, we return ORIG. Newly generated
36356 position-independent addresses go into a reg. This is REG if non
36357 zero, otherwise we allocate register(s) as necessary. */
36359 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36362 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
36363 rtx reg)
36365 rtx base, offset;
36367 if (reg == NULL && ! reload_in_progress && ! reload_completed)
36368 reg = gen_reg_rtx (Pmode);
36370 if (GET_CODE (orig) == CONST)
36372 rtx reg_temp;
36374 if (GET_CODE (XEXP (orig, 0)) == PLUS
36375 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
36376 return orig;
36378 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
36380 /* Use a different reg for the intermediate value, as
36381 it will be marked UNCHANGING. */
36382 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
36383 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
36384 Pmode, reg_temp);
36385 offset =
36386 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
36387 Pmode, reg);
36389 if (GET_CODE (offset) == CONST_INT)
36391 if (SMALL_INT (offset))
36392 return plus_constant (Pmode, base, INTVAL (offset));
36393 else if (! reload_in_progress && ! reload_completed)
36394 offset = force_reg (Pmode, offset);
36395 else
36397 rtx mem = force_const_mem (Pmode, orig);
36398 return machopic_legitimize_pic_address (mem, Pmode, reg);
36401 return gen_rtx_PLUS (Pmode, base, offset);
36404 /* Fall back on generic machopic code. */
36405 return machopic_legitimize_pic_address (orig, mode, reg);
36408 /* Output a .machine directive for the Darwin assembler, and call
36409 the generic start_file routine. */
36411 static void
36412 rs6000_darwin_file_start (void)
36414 static const struct
36416 const char *arg;
36417 const char *name;
36418 HOST_WIDE_INT if_set;
36419 } mapping[] = {
36420 { "ppc64", "ppc64", MASK_64BIT },
36421 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
36422 { "power4", "ppc970", 0 },
36423 { "G5", "ppc970", 0 },
36424 { "7450", "ppc7450", 0 },
36425 { "7400", "ppc7400", MASK_ALTIVEC },
36426 { "G4", "ppc7400", 0 },
36427 { "750", "ppc750", 0 },
36428 { "740", "ppc750", 0 },
36429 { "G3", "ppc750", 0 },
36430 { "604e", "ppc604e", 0 },
36431 { "604", "ppc604", 0 },
36432 { "603e", "ppc603", 0 },
36433 { "603", "ppc603", 0 },
36434 { "601", "ppc601", 0 },
36435 { NULL, "ppc", 0 } };
36436 const char *cpu_id = "";
36437 size_t i;
36439 rs6000_file_start ();
36440 darwin_file_start ();
36442 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36444 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
36445 cpu_id = rs6000_default_cpu;
36447 if (global_options_set.x_rs6000_cpu_index)
36448 cpu_id = processor_target_table[rs6000_cpu_index].name;
36450 /* Look through the mapping array. Pick the first name that either
36451 matches the argument, has a bit set in IF_SET that is also set
36452 in the target flags, or has a NULL name. */
36454 i = 0;
36455 while (mapping[i].arg != NULL
36456 && strcmp (mapping[i].arg, cpu_id) != 0
36457 && (mapping[i].if_set & rs6000_isa_flags) == 0)
36458 i++;
36460 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
36463 #endif /* TARGET_MACHO */
36465 #if TARGET_ELF
36466 static int
36467 rs6000_elf_reloc_rw_mask (void)
36469 if (flag_pic)
36470 return 3;
36471 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
36472 return 2;
36473 else
36474 return 0;
36477 /* Record an element in the table of global constructors. SYMBOL is
36478 a SYMBOL_REF of the function to be called; PRIORITY is a number
36479 between 0 and MAX_INIT_PRIORITY.
36481 This differs from default_named_section_asm_out_constructor in
36482 that we have special handling for -mrelocatable. */
36484 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
36485 static void
36486 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
36488 const char *section = ".ctors";
36489 char buf[18];
36491 if (priority != DEFAULT_INIT_PRIORITY)
36493 sprintf (buf, ".ctors.%.5u",
36494 /* Invert the numbering so the linker puts us in the proper
36495 order; constructors are run from right to left, and the
36496 linker sorts in increasing order. */
36497 MAX_INIT_PRIORITY - priority);
36498 section = buf;
36501 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36502 assemble_align (POINTER_SIZE);
36504 if (DEFAULT_ABI == ABI_V4
36505 && (TARGET_RELOCATABLE || flag_pic > 1))
36507 fputs ("\t.long (", asm_out_file);
36508 output_addr_const (asm_out_file, symbol);
36509 fputs (")@fixup\n", asm_out_file);
36511 else
36512 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36515 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36516 static void
36517 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36519 const char *section = ".dtors";
36520 char buf[18];
36522 if (priority != DEFAULT_INIT_PRIORITY)
36524 sprintf (buf, ".dtors.%.5u",
36525 /* Invert the numbering so the linker puts us in the proper
36526 order; constructors are run from right to left, and the
36527 linker sorts in increasing order. */
36528 MAX_INIT_PRIORITY - priority);
36529 section = buf;
36532 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36533 assemble_align (POINTER_SIZE);
36535 if (DEFAULT_ABI == ABI_V4
36536 && (TARGET_RELOCATABLE || flag_pic > 1))
36538 fputs ("\t.long (", asm_out_file);
36539 output_addr_const (asm_out_file, symbol);
36540 fputs (")@fixup\n", asm_out_file);
36542 else
36543 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36546 void
36547 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36549 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36551 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36552 ASM_OUTPUT_LABEL (file, name);
36553 fputs (DOUBLE_INT_ASM_OP, file);
36554 rs6000_output_function_entry (file, name);
36555 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36556 if (DOT_SYMBOLS)
36558 fputs ("\t.size\t", file);
36559 assemble_name (file, name);
36560 fputs (",24\n\t.type\t.", file);
36561 assemble_name (file, name);
36562 fputs (",@function\n", file);
36563 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36565 fputs ("\t.globl\t.", file);
36566 assemble_name (file, name);
36567 putc ('\n', file);
36570 else
36571 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36572 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36573 rs6000_output_function_entry (file, name);
36574 fputs (":\n", file);
36575 return;
36578 if (DEFAULT_ABI == ABI_V4
36579 && (TARGET_RELOCATABLE || flag_pic > 1)
36580 && !TARGET_SECURE_PLT
36581 && (!constant_pool_empty_p () || crtl->profile)
36582 && uses_TOC ())
36584 char buf[256];
36586 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36588 fprintf (file, "\t.long ");
36589 assemble_name (file, toc_label_name);
36590 need_toc_init = 1;
36591 putc ('-', file);
36592 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36593 assemble_name (file, buf);
36594 putc ('\n', file);
36597 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36598 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36600 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36602 char buf[256];
36604 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36606 fprintf (file, "\t.quad .TOC.-");
36607 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36608 assemble_name (file, buf);
36609 putc ('\n', file);
36612 if (DEFAULT_ABI == ABI_AIX)
36614 const char *desc_name, *orig_name;
36616 orig_name = (*targetm.strip_name_encoding) (name);
36617 desc_name = orig_name;
36618 while (*desc_name == '.')
36619 desc_name++;
36621 if (TREE_PUBLIC (decl))
36622 fprintf (file, "\t.globl %s\n", desc_name);
36624 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36625 fprintf (file, "%s:\n", desc_name);
36626 fprintf (file, "\t.long %s\n", orig_name);
36627 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36628 fputs ("\t.long 0\n", file);
36629 fprintf (file, "\t.previous\n");
36631 ASM_OUTPUT_LABEL (file, name);
36634 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36635 static void
36636 rs6000_elf_file_end (void)
36638 #ifdef HAVE_AS_GNU_ATTRIBUTE
36639 /* ??? The value emitted depends on options active at file end.
36640 Assume anyone using #pragma or attributes that might change
36641 options knows what they are doing. */
36642 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36643 && rs6000_passes_float)
36645 int fp;
36647 if (TARGET_DF_FPR | TARGET_DF_SPE)
36648 fp = 1;
36649 else if (TARGET_SF_FPR | TARGET_SF_SPE)
36650 fp = 3;
36651 else
36652 fp = 2;
36653 if (rs6000_passes_long_double)
36655 if (!TARGET_LONG_DOUBLE_128)
36656 fp |= 2 * 4;
36657 else if (TARGET_IEEEQUAD)
36658 fp |= 3 * 4;
36659 else
36660 fp |= 1 * 4;
36662 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36664 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36666 if (rs6000_passes_vector)
36667 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36668 (TARGET_ALTIVEC_ABI ? 2
36669 : TARGET_SPE_ABI ? 3
36670 : 1));
36671 if (rs6000_returns_struct)
36672 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36673 aix_struct_return ? 2 : 1);
36675 #endif
36676 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36677 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36678 file_end_indicate_exec_stack ();
36679 #endif
36681 if (flag_split_stack)
36682 file_end_indicate_split_stack ();
36684 if (cpu_builtin_p)
36686 /* We have expanded a CPU builtin, so we need to emit a reference to
36687 the special symbol that LIBC uses to declare it supports the
36688 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36689 switch_to_section (data_section);
36690 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36691 fprintf (asm_out_file, "\t%s %s\n",
36692 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36695 #endif
36697 #if TARGET_XCOFF
36699 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36700 #define HAVE_XCOFF_DWARF_EXTRAS 0
36701 #endif
36703 static enum unwind_info_type
36704 rs6000_xcoff_debug_unwind_info (void)
36706 return UI_NONE;
36709 static void
36710 rs6000_xcoff_asm_output_anchor (rtx symbol)
36712 char buffer[100];
36714 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36715 SYMBOL_REF_BLOCK_OFFSET (symbol));
36716 fprintf (asm_out_file, "%s", SET_ASM_OP);
36717 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36718 fprintf (asm_out_file, ",");
36719 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36720 fprintf (asm_out_file, "\n");
36723 static void
36724 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36726 fputs (GLOBAL_ASM_OP, stream);
36727 RS6000_OUTPUT_BASENAME (stream, name);
36728 putc ('\n', stream);
36731 /* A get_unnamed_decl callback, used for read-only sections. PTR
36732 points to the section string variable. */
36734 static void
36735 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36737 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36738 *(const char *const *) directive,
36739 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36742 /* Likewise for read-write sections. */
36744 static void
36745 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36747 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36748 *(const char *const *) directive,
36749 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36752 static void
36753 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36755 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36756 *(const char *const *) directive,
36757 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36760 /* A get_unnamed_section callback, used for switching to toc_section. */
36762 static void
36763 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36765 if (TARGET_MINIMAL_TOC)
36767 /* toc_section is always selected at least once from
36768 rs6000_xcoff_file_start, so this is guaranteed to
36769 always be defined once and only once in each file. */
36770 if (!toc_initialized)
36772 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36773 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36774 toc_initialized = 1;
36776 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36777 (TARGET_32BIT ? "" : ",3"));
36779 else
36780 fputs ("\t.toc\n", asm_out_file);
36783 /* Implement TARGET_ASM_INIT_SECTIONS. */
36785 static void
36786 rs6000_xcoff_asm_init_sections (void)
36788 read_only_data_section
36789 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36790 &xcoff_read_only_section_name);
36792 private_data_section
36793 = get_unnamed_section (SECTION_WRITE,
36794 rs6000_xcoff_output_readwrite_section_asm_op,
36795 &xcoff_private_data_section_name);
36797 tls_data_section
36798 = get_unnamed_section (SECTION_TLS,
36799 rs6000_xcoff_output_tls_section_asm_op,
36800 &xcoff_tls_data_section_name);
36802 tls_private_data_section
36803 = get_unnamed_section (SECTION_TLS,
36804 rs6000_xcoff_output_tls_section_asm_op,
36805 &xcoff_private_data_section_name);
36807 read_only_private_data_section
36808 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36809 &xcoff_private_data_section_name);
36811 toc_section
36812 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36814 readonly_data_section = read_only_data_section;
36817 static int
36818 rs6000_xcoff_reloc_rw_mask (void)
36820 return 3;
36823 static void
36824 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36825 tree decl ATTRIBUTE_UNUSED)
36827 int smclass;
36828 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36830 if (flags & SECTION_EXCLUDE)
36831 smclass = 4;
36832 else if (flags & SECTION_DEBUG)
36834 fprintf (asm_out_file, "\t.dwsect %s\n", name);
36835 return;
36837 else if (flags & SECTION_CODE)
36838 smclass = 0;
36839 else if (flags & SECTION_TLS)
36840 smclass = 3;
36841 else if (flags & SECTION_WRITE)
36842 smclass = 2;
36843 else
36844 smclass = 1;
36846 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36847 (flags & SECTION_CODE) ? "." : "",
36848 name, suffix[smclass], flags & SECTION_ENTSIZE);
36851 #define IN_NAMED_SECTION(DECL) \
36852 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36853 && DECL_SECTION_NAME (DECL) != NULL)
36855 static section *
36856 rs6000_xcoff_select_section (tree decl, int reloc,
36857 unsigned HOST_WIDE_INT align)
36859 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36860 named section. */
36861 if (align > BIGGEST_ALIGNMENT)
36863 resolve_unique_section (decl, reloc, true);
36864 if (IN_NAMED_SECTION (decl))
36865 return get_named_section (decl, NULL, reloc);
36868 if (decl_readonly_section (decl, reloc))
36870 if (TREE_PUBLIC (decl))
36871 return read_only_data_section;
36872 else
36873 return read_only_private_data_section;
36875 else
36877 #if HAVE_AS_TLS
36878 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36880 if (TREE_PUBLIC (decl))
36881 return tls_data_section;
36882 else if (bss_initializer_p (decl))
36884 /* Convert to COMMON to emit in BSS. */
36885 DECL_COMMON (decl) = 1;
36886 return tls_comm_section;
36888 else
36889 return tls_private_data_section;
36891 else
36892 #endif
36893 if (TREE_PUBLIC (decl))
36894 return data_section;
36895 else
36896 return private_data_section;
36900 static void
36901 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36903 const char *name;
36905 /* Use select_section for private data and uninitialized data with
36906 alignment <= BIGGEST_ALIGNMENT. */
36907 if (!TREE_PUBLIC (decl)
36908 || DECL_COMMON (decl)
36909 || (DECL_INITIAL (decl) == NULL_TREE
36910 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36911 || DECL_INITIAL (decl) == error_mark_node
36912 || (flag_zero_initialized_in_bss
36913 && initializer_zerop (DECL_INITIAL (decl))))
36914 return;
36916 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36917 name = (*targetm.strip_name_encoding) (name);
36918 set_decl_section_name (decl, name);
36921 /* Select section for constant in constant pool.
36923 On RS/6000, all constants are in the private read-only data area.
36924 However, if this is being placed in the TOC it must be output as a
36925 toc entry. */
36927 static section *
36928 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36929 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36931 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36932 return toc_section;
36933 else
36934 return read_only_private_data_section;
36937 /* Remove any trailing [DS] or the like from the symbol name. */
36939 static const char *
36940 rs6000_xcoff_strip_name_encoding (const char *name)
36942 size_t len;
36943 if (*name == '*')
36944 name++;
36945 len = strlen (name);
36946 if (name[len - 1] == ']')
36947 return ggc_alloc_string (name, len - 4);
36948 else
36949 return name;
36952 /* Section attributes. AIX is always PIC. */
36954 static unsigned int
36955 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36957 unsigned int align;
36958 unsigned int flags = default_section_type_flags (decl, name, reloc);
36960 /* Align to at least UNIT size. */
36961 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36962 align = MIN_UNITS_PER_WORD;
36963 else
36964 /* Increase alignment of large objects if not already stricter. */
36965 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36966 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36967 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36969 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36972 /* Output at beginning of assembler file.
36974 Initialize the section names for the RS/6000 at this point.
36976 Specify filename, including full path, to assembler.
36978 We want to go into the TOC section so at least one .toc will be emitted.
36979 Also, in order to output proper .bs/.es pairs, we need at least one static
36980 [RW] section emitted.
36982 Finally, declare mcount when profiling to make the assembler happy. */
36984 static void
36985 rs6000_xcoff_file_start (void)
36987 rs6000_gen_section_name (&xcoff_bss_section_name,
36988 main_input_filename, ".bss_");
36989 rs6000_gen_section_name (&xcoff_private_data_section_name,
36990 main_input_filename, ".rw_");
36991 rs6000_gen_section_name (&xcoff_read_only_section_name,
36992 main_input_filename, ".ro_");
36993 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36994 main_input_filename, ".tls_");
36995 rs6000_gen_section_name (&xcoff_tbss_section_name,
36996 main_input_filename, ".tbss_[UL]");
36998 fputs ("\t.file\t", asm_out_file);
36999 output_quoted_string (asm_out_file, main_input_filename);
37000 fputc ('\n', asm_out_file);
37001 if (write_symbols != NO_DEBUG)
37002 switch_to_section (private_data_section);
37003 switch_to_section (toc_section);
37004 switch_to_section (text_section);
37005 if (profile_flag)
37006 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
37007 rs6000_file_start ();
37010 /* Output at end of assembler file.
37011 On the RS/6000, referencing data should automatically pull in text. */
37013 static void
37014 rs6000_xcoff_file_end (void)
37016 switch_to_section (text_section);
37017 fputs ("_section_.text:\n", asm_out_file);
37018 switch_to_section (data_section);
37019 fputs (TARGET_32BIT
37020 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
37021 asm_out_file);
37024 struct declare_alias_data
37026 FILE *file;
37027 bool function_descriptor;
37030 /* Declare alias N. A helper function for for_node_and_aliases. */
37032 static bool
37033 rs6000_declare_alias (struct symtab_node *n, void *d)
37035 struct declare_alias_data *data = (struct declare_alias_data *)d;
37036 /* Main symbol is output specially, because varasm machinery does part of
37037 the job for us - we do not need to declare .globl/lglobs and such. */
37038 if (!n->alias || n->weakref)
37039 return false;
37041 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
37042 return false;
37044 /* Prevent assemble_alias from trying to use .set pseudo operation
37045 that does not behave as expected by the middle-end. */
37046 TREE_ASM_WRITTEN (n->decl) = true;
37048 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
37049 char *buffer = (char *) alloca (strlen (name) + 2);
37050 char *p;
37051 int dollar_inside = 0;
37053 strcpy (buffer, name);
37054 p = strchr (buffer, '$');
37055 while (p) {
37056 *p = '_';
37057 dollar_inside++;
37058 p = strchr (p + 1, '$');
37060 if (TREE_PUBLIC (n->decl))
37062 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
37064 if (dollar_inside) {
37065 if (data->function_descriptor)
37066 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37067 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37069 if (data->function_descriptor)
37071 fputs ("\t.globl .", data->file);
37072 RS6000_OUTPUT_BASENAME (data->file, buffer);
37073 putc ('\n', data->file);
37075 fputs ("\t.globl ", data->file);
37076 RS6000_OUTPUT_BASENAME (data->file, buffer);
37077 putc ('\n', data->file);
37079 #ifdef ASM_WEAKEN_DECL
37080 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
37081 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
37082 #endif
37084 else
37086 if (dollar_inside)
37088 if (data->function_descriptor)
37089 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37090 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37092 if (data->function_descriptor)
37094 fputs ("\t.lglobl .", data->file);
37095 RS6000_OUTPUT_BASENAME (data->file, buffer);
37096 putc ('\n', data->file);
37098 fputs ("\t.lglobl ", data->file);
37099 RS6000_OUTPUT_BASENAME (data->file, buffer);
37100 putc ('\n', data->file);
37102 if (data->function_descriptor)
37103 fputs (".", data->file);
37104 RS6000_OUTPUT_BASENAME (data->file, buffer);
37105 fputs (":\n", data->file);
37106 return false;
37110 #ifdef HAVE_GAS_HIDDEN
37111 /* Helper function to calculate visibility of a DECL
37112 and return the value as a const string. */
37114 static const char *
37115 rs6000_xcoff_visibility (tree decl)
37117 static const char * const visibility_types[] = {
37118 "", ",protected", ",hidden", ",internal"
37121 enum symbol_visibility vis = DECL_VISIBILITY (decl);
37123 if (TREE_CODE (decl) == FUNCTION_DECL
37124 && cgraph_node::get (decl)
37125 && cgraph_node::get (decl)->instrumentation_clone
37126 && cgraph_node::get (decl)->instrumented_version)
37127 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
37129 return visibility_types[vis];
37131 #endif
37134 /* This macro produces the initial definition of a function name.
37135 On the RS/6000, we need to place an extra '.' in the function name and
37136 output the function descriptor.
37137 Dollar signs are converted to underscores.
37139 The csect for the function will have already been created when
37140 text_section was selected. We do have to go back to that csect, however.
37142 The third and fourth parameters to the .function pseudo-op (16 and 044)
37143 are placeholders which no longer have any use.
37145 Because AIX assembler's .set command has unexpected semantics, we output
37146 all aliases as alternative labels in front of the definition. */
37148 void
37149 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
37151 char *buffer = (char *) alloca (strlen (name) + 1);
37152 char *p;
37153 int dollar_inside = 0;
37154 struct declare_alias_data data = {file, false};
37156 strcpy (buffer, name);
37157 p = strchr (buffer, '$');
37158 while (p) {
37159 *p = '_';
37160 dollar_inside++;
37161 p = strchr (p + 1, '$');
37163 if (TREE_PUBLIC (decl))
37165 if (!RS6000_WEAK || !DECL_WEAK (decl))
37167 if (dollar_inside) {
37168 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37169 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37171 fputs ("\t.globl .", file);
37172 RS6000_OUTPUT_BASENAME (file, buffer);
37173 #ifdef HAVE_GAS_HIDDEN
37174 fputs (rs6000_xcoff_visibility (decl), file);
37175 #endif
37176 putc ('\n', file);
37179 else
37181 if (dollar_inside) {
37182 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37183 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37185 fputs ("\t.lglobl .", file);
37186 RS6000_OUTPUT_BASENAME (file, buffer);
37187 putc ('\n', file);
37189 fputs ("\t.csect ", file);
37190 RS6000_OUTPUT_BASENAME (file, buffer);
37191 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
37192 RS6000_OUTPUT_BASENAME (file, buffer);
37193 fputs (":\n", file);
37194 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37195 &data, true);
37196 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
37197 RS6000_OUTPUT_BASENAME (file, buffer);
37198 fputs (", TOC[tc0], 0\n", file);
37199 in_section = NULL;
37200 switch_to_section (function_section (decl));
37201 putc ('.', file);
37202 RS6000_OUTPUT_BASENAME (file, buffer);
37203 fputs (":\n", file);
37204 data.function_descriptor = true;
37205 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37206 &data, true);
37207 if (!DECL_IGNORED_P (decl))
37209 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
37210 xcoffout_declare_function (file, decl, buffer);
37211 else if (write_symbols == DWARF2_DEBUG)
37213 name = (*targetm.strip_name_encoding) (name);
37214 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
37217 return;
37221 /* Output assembly language to globalize a symbol from a DECL,
37222 possibly with visibility. */
37224 void
37225 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
37227 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
37228 fputs (GLOBAL_ASM_OP, stream);
37229 RS6000_OUTPUT_BASENAME (stream, name);
37230 #ifdef HAVE_GAS_HIDDEN
37231 fputs (rs6000_xcoff_visibility (decl), stream);
37232 #endif
37233 putc ('\n', stream);
37236 /* Output assembly language to define a symbol as COMMON from a DECL,
37237 possibly with visibility. */
37239 void
37240 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
37241 tree decl ATTRIBUTE_UNUSED,
37242 const char *name,
37243 unsigned HOST_WIDE_INT size,
37244 unsigned HOST_WIDE_INT align)
37246 unsigned HOST_WIDE_INT align2 = 2;
37248 if (align > 32)
37249 align2 = floor_log2 (align / BITS_PER_UNIT);
37250 else if (size > 4)
37251 align2 = 3;
37253 fputs (COMMON_ASM_OP, stream);
37254 RS6000_OUTPUT_BASENAME (stream, name);
37256 fprintf (stream,
37257 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
37258 size, align2);
37260 #ifdef HAVE_GAS_HIDDEN
37261 fputs (rs6000_xcoff_visibility (decl), stream);
37262 #endif
37263 putc ('\n', stream);
37266 /* This macro produces the initial definition of a object (variable) name.
37267 Because AIX assembler's .set command has unexpected semantics, we output
37268 all aliases as alternative labels in front of the definition. */
37270 void
37271 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
37273 struct declare_alias_data data = {file, false};
37274 RS6000_OUTPUT_BASENAME (file, name);
37275 fputs (":\n", file);
37276 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37277 &data, true);
37280 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37282 void
37283 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
37285 fputs (integer_asm_op (size, FALSE), file);
37286 assemble_name (file, label);
37287 fputs ("-$", file);
37290 /* Output a symbol offset relative to the dbase for the current object.
37291 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37292 signed offsets.
37294 __gcc_unwind_dbase is embedded in all executables/libraries through
37295 libgcc/config/rs6000/crtdbase.S. */
37297 void
37298 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
37300 fputs (integer_asm_op (size, FALSE), file);
37301 assemble_name (file, label);
37302 fputs("-__gcc_unwind_dbase", file);
37305 #ifdef HAVE_AS_TLS
37306 static void
37307 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
37309 rtx symbol;
37310 int flags;
37311 const char *symname;
37313 default_encode_section_info (decl, rtl, first);
37315 /* Careful not to prod global register variables. */
37316 if (!MEM_P (rtl))
37317 return;
37318 symbol = XEXP (rtl, 0);
37319 if (GET_CODE (symbol) != SYMBOL_REF)
37320 return;
37322 flags = SYMBOL_REF_FLAGS (symbol);
37324 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
37325 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
37327 SYMBOL_REF_FLAGS (symbol) = flags;
37329 /* Append mapping class to extern decls. */
37330 symname = XSTR (symbol, 0);
37331 if (decl /* sync condition with assemble_external () */
37332 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
37333 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
37334 || TREE_CODE (decl) == FUNCTION_DECL)
37335 && symname[strlen (symname) - 1] != ']')
37337 char *newname = (char *) alloca (strlen (symname) + 5);
37338 strcpy (newname, symname);
37339 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
37340 ? "[DS]" : "[UA]"));
37341 XSTR (symbol, 0) = ggc_strdup (newname);
37344 #endif /* HAVE_AS_TLS */
37345 #endif /* TARGET_XCOFF */
37347 void
37348 rs6000_asm_weaken_decl (FILE *stream, tree decl,
37349 const char *name, const char *val)
37351 fputs ("\t.weak\t", stream);
37352 RS6000_OUTPUT_BASENAME (stream, name);
37353 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37354 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37356 if (TARGET_XCOFF)
37357 fputs ("[DS]", stream);
37358 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37359 if (TARGET_XCOFF)
37360 fputs (rs6000_xcoff_visibility (decl), stream);
37361 #endif
37362 fputs ("\n\t.weak\t.", stream);
37363 RS6000_OUTPUT_BASENAME (stream, name);
37365 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37366 if (TARGET_XCOFF)
37367 fputs (rs6000_xcoff_visibility (decl), stream);
37368 #endif
37369 fputc ('\n', stream);
37370 if (val)
37372 #ifdef ASM_OUTPUT_DEF
37373 ASM_OUTPUT_DEF (stream, name, val);
37374 #endif
37375 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37376 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37378 fputs ("\t.set\t.", stream);
37379 RS6000_OUTPUT_BASENAME (stream, name);
37380 fputs (",.", stream);
37381 RS6000_OUTPUT_BASENAME (stream, val);
37382 fputc ('\n', stream);
37388 /* Return true if INSN should not be copied. */
37390 static bool
37391 rs6000_cannot_copy_insn_p (rtx_insn *insn)
37393 return recog_memoized (insn) >= 0
37394 && get_attr_cannot_copy (insn);
37397 /* Compute a (partial) cost for rtx X. Return true if the complete
37398 cost has been computed, and false if subexpressions should be
37399 scanned. In either case, *TOTAL contains the cost result. */
37401 static bool
37402 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
37403 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
37405 int code = GET_CODE (x);
37407 switch (code)
37409 /* On the RS/6000, if it is valid in the insn, it is free. */
37410 case CONST_INT:
37411 if (((outer_code == SET
37412 || outer_code == PLUS
37413 || outer_code == MINUS)
37414 && (satisfies_constraint_I (x)
37415 || satisfies_constraint_L (x)))
37416 || (outer_code == AND
37417 && (satisfies_constraint_K (x)
37418 || (mode == SImode
37419 ? satisfies_constraint_L (x)
37420 : satisfies_constraint_J (x))))
37421 || ((outer_code == IOR || outer_code == XOR)
37422 && (satisfies_constraint_K (x)
37423 || (mode == SImode
37424 ? satisfies_constraint_L (x)
37425 : satisfies_constraint_J (x))))
37426 || outer_code == ASHIFT
37427 || outer_code == ASHIFTRT
37428 || outer_code == LSHIFTRT
37429 || outer_code == ROTATE
37430 || outer_code == ROTATERT
37431 || outer_code == ZERO_EXTRACT
37432 || (outer_code == MULT
37433 && satisfies_constraint_I (x))
37434 || ((outer_code == DIV || outer_code == UDIV
37435 || outer_code == MOD || outer_code == UMOD)
37436 && exact_log2 (INTVAL (x)) >= 0)
37437 || (outer_code == COMPARE
37438 && (satisfies_constraint_I (x)
37439 || satisfies_constraint_K (x)))
37440 || ((outer_code == EQ || outer_code == NE)
37441 && (satisfies_constraint_I (x)
37442 || satisfies_constraint_K (x)
37443 || (mode == SImode
37444 ? satisfies_constraint_L (x)
37445 : satisfies_constraint_J (x))))
37446 || (outer_code == GTU
37447 && satisfies_constraint_I (x))
37448 || (outer_code == LTU
37449 && satisfies_constraint_P (x)))
37451 *total = 0;
37452 return true;
37454 else if ((outer_code == PLUS
37455 && reg_or_add_cint_operand (x, VOIDmode))
37456 || (outer_code == MINUS
37457 && reg_or_sub_cint_operand (x, VOIDmode))
37458 || ((outer_code == SET
37459 || outer_code == IOR
37460 || outer_code == XOR)
37461 && (INTVAL (x)
37462 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
37464 *total = COSTS_N_INSNS (1);
37465 return true;
37467 /* FALLTHRU */
37469 case CONST_DOUBLE:
37470 case CONST_WIDE_INT:
37471 case CONST:
37472 case HIGH:
37473 case SYMBOL_REF:
37474 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37475 return true;
37477 case MEM:
37478 /* When optimizing for size, MEM should be slightly more expensive
37479 than generating address, e.g., (plus (reg) (const)).
37480 L1 cache latency is about two instructions. */
37481 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37482 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
37483 *total += COSTS_N_INSNS (100);
37484 return true;
37486 case LABEL_REF:
37487 *total = 0;
37488 return true;
37490 case PLUS:
37491 case MINUS:
37492 if (FLOAT_MODE_P (mode))
37493 *total = rs6000_cost->fp;
37494 else
37495 *total = COSTS_N_INSNS (1);
37496 return false;
37498 case MULT:
37499 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37500 && satisfies_constraint_I (XEXP (x, 1)))
37502 if (INTVAL (XEXP (x, 1)) >= -256
37503 && INTVAL (XEXP (x, 1)) <= 255)
37504 *total = rs6000_cost->mulsi_const9;
37505 else
37506 *total = rs6000_cost->mulsi_const;
37508 else if (mode == SFmode)
37509 *total = rs6000_cost->fp;
37510 else if (FLOAT_MODE_P (mode))
37511 *total = rs6000_cost->dmul;
37512 else if (mode == DImode)
37513 *total = rs6000_cost->muldi;
37514 else
37515 *total = rs6000_cost->mulsi;
37516 return false;
37518 case FMA:
37519 if (mode == SFmode)
37520 *total = rs6000_cost->fp;
37521 else
37522 *total = rs6000_cost->dmul;
37523 break;
37525 case DIV:
37526 case MOD:
37527 if (FLOAT_MODE_P (mode))
37529 *total = mode == DFmode ? rs6000_cost->ddiv
37530 : rs6000_cost->sdiv;
37531 return false;
37533 /* FALLTHRU */
37535 case UDIV:
37536 case UMOD:
37537 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37538 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37540 if (code == DIV || code == MOD)
37541 /* Shift, addze */
37542 *total = COSTS_N_INSNS (2);
37543 else
37544 /* Shift */
37545 *total = COSTS_N_INSNS (1);
37547 else
37549 if (GET_MODE (XEXP (x, 1)) == DImode)
37550 *total = rs6000_cost->divdi;
37551 else
37552 *total = rs6000_cost->divsi;
37554 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37555 if (!TARGET_MODULO && (code == MOD || code == UMOD))
37556 *total += COSTS_N_INSNS (2);
37557 return false;
37559 case CTZ:
37560 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37561 return false;
37563 case FFS:
37564 *total = COSTS_N_INSNS (4);
37565 return false;
37567 case POPCOUNT:
37568 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37569 return false;
37571 case PARITY:
37572 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37573 return false;
37575 case NOT:
37576 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37577 *total = 0;
37578 else
37579 *total = COSTS_N_INSNS (1);
37580 return false;
37582 case AND:
37583 if (CONST_INT_P (XEXP (x, 1)))
37585 rtx left = XEXP (x, 0);
37586 rtx_code left_code = GET_CODE (left);
37588 /* rotate-and-mask: 1 insn. */
37589 if ((left_code == ROTATE
37590 || left_code == ASHIFT
37591 || left_code == LSHIFTRT)
37592 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37594 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37595 if (!CONST_INT_P (XEXP (left, 1)))
37596 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37597 *total += COSTS_N_INSNS (1);
37598 return true;
37601 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37602 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37603 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37604 || (val & 0xffff) == val
37605 || (val & 0xffff0000) == val
37606 || ((val & 0xffff) == 0 && mode == SImode))
37608 *total = rtx_cost (left, mode, AND, 0, speed);
37609 *total += COSTS_N_INSNS (1);
37610 return true;
37613 /* 2 insns. */
37614 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37616 *total = rtx_cost (left, mode, AND, 0, speed);
37617 *total += COSTS_N_INSNS (2);
37618 return true;
37622 *total = COSTS_N_INSNS (1);
37623 return false;
37625 case IOR:
37626 /* FIXME */
37627 *total = COSTS_N_INSNS (1);
37628 return true;
37630 case CLZ:
37631 case XOR:
37632 case ZERO_EXTRACT:
37633 *total = COSTS_N_INSNS (1);
37634 return false;
37636 case ASHIFT:
37637 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37638 the sign extend and shift separately within the insn. */
37639 if (TARGET_EXTSWSLI && mode == DImode
37640 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37641 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37643 *total = 0;
37644 return false;
37646 /* fall through */
37648 case ASHIFTRT:
37649 case LSHIFTRT:
37650 case ROTATE:
37651 case ROTATERT:
37652 /* Handle mul_highpart. */
37653 if (outer_code == TRUNCATE
37654 && GET_CODE (XEXP (x, 0)) == MULT)
37656 if (mode == DImode)
37657 *total = rs6000_cost->muldi;
37658 else
37659 *total = rs6000_cost->mulsi;
37660 return true;
37662 else if (outer_code == AND)
37663 *total = 0;
37664 else
37665 *total = COSTS_N_INSNS (1);
37666 return false;
37668 case SIGN_EXTEND:
37669 case ZERO_EXTEND:
37670 if (GET_CODE (XEXP (x, 0)) == MEM)
37671 *total = 0;
37672 else
37673 *total = COSTS_N_INSNS (1);
37674 return false;
37676 case COMPARE:
37677 case NEG:
37678 case ABS:
37679 if (!FLOAT_MODE_P (mode))
37681 *total = COSTS_N_INSNS (1);
37682 return false;
37684 /* FALLTHRU */
37686 case FLOAT:
37687 case UNSIGNED_FLOAT:
37688 case FIX:
37689 case UNSIGNED_FIX:
37690 case FLOAT_TRUNCATE:
37691 *total = rs6000_cost->fp;
37692 return false;
37694 case FLOAT_EXTEND:
37695 if (mode == DFmode)
37696 *total = rs6000_cost->sfdf_convert;
37697 else
37698 *total = rs6000_cost->fp;
37699 return false;
37701 case UNSPEC:
37702 switch (XINT (x, 1))
37704 case UNSPEC_FRSP:
37705 *total = rs6000_cost->fp;
37706 return true;
37708 default:
37709 break;
37711 break;
37713 case CALL:
37714 case IF_THEN_ELSE:
37715 if (!speed)
37717 *total = COSTS_N_INSNS (1);
37718 return true;
37720 else if (FLOAT_MODE_P (mode)
37721 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37723 *total = rs6000_cost->fp;
37724 return false;
37726 break;
37728 case NE:
37729 case EQ:
37730 case GTU:
37731 case LTU:
37732 /* Carry bit requires mode == Pmode.
37733 NEG or PLUS already counted so only add one. */
37734 if (mode == Pmode
37735 && (outer_code == NEG || outer_code == PLUS))
37737 *total = COSTS_N_INSNS (1);
37738 return true;
37740 if (outer_code == SET)
37742 if (XEXP (x, 1) == const0_rtx)
37744 if (TARGET_ISEL && !TARGET_MFCRF)
37745 *total = COSTS_N_INSNS (8);
37746 else
37747 *total = COSTS_N_INSNS (2);
37748 return true;
37750 else
37752 *total = COSTS_N_INSNS (3);
37753 return false;
37756 /* FALLTHRU */
37758 case GT:
37759 case LT:
37760 case UNORDERED:
37761 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37763 if (TARGET_ISEL && !TARGET_MFCRF)
37764 *total = COSTS_N_INSNS (8);
37765 else
37766 *total = COSTS_N_INSNS (2);
37767 return true;
37769 /* CC COMPARE. */
37770 if (outer_code == COMPARE)
37772 *total = 0;
37773 return true;
37775 break;
37777 default:
37778 break;
37781 return false;
37784 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37786 static bool
37787 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37788 int opno, int *total, bool speed)
37790 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37792 fprintf (stderr,
37793 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37794 "opno = %d, total = %d, speed = %s, x:\n",
37795 ret ? "complete" : "scan inner",
37796 GET_MODE_NAME (mode),
37797 GET_RTX_NAME (outer_code),
37798 opno,
37799 *total,
37800 speed ? "true" : "false");
37802 debug_rtx (x);
37804 return ret;
37807 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37809 static int
37810 rs6000_debug_address_cost (rtx x, machine_mode mode,
37811 addr_space_t as, bool speed)
37813 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37815 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37816 ret, speed ? "true" : "false");
37817 debug_rtx (x);
37819 return ret;
37823 /* A C expression returning the cost of moving data from a register of class
37824 CLASS1 to one of CLASS2. */
37826 static int
37827 rs6000_register_move_cost (machine_mode mode,
37828 reg_class_t from, reg_class_t to)
37830 int ret;
37832 if (TARGET_DEBUG_COST)
37833 dbg_cost_ctrl++;
37835 /* Moves from/to GENERAL_REGS. */
37836 if (reg_classes_intersect_p (to, GENERAL_REGS)
37837 || reg_classes_intersect_p (from, GENERAL_REGS))
37839 reg_class_t rclass = from;
37841 if (! reg_classes_intersect_p (to, GENERAL_REGS))
37842 rclass = to;
37844 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37845 ret = (rs6000_memory_move_cost (mode, rclass, false)
37846 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37848 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37849 shift. */
37850 else if (rclass == CR_REGS)
37851 ret = 4;
37853 /* For those processors that have slow LR/CTR moves, make them more
37854 expensive than memory in order to bias spills to memory .*/
37855 else if ((rs6000_cpu == PROCESSOR_POWER6
37856 || rs6000_cpu == PROCESSOR_POWER7
37857 || rs6000_cpu == PROCESSOR_POWER8
37858 || rs6000_cpu == PROCESSOR_POWER9)
37859 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37860 ret = 6 * hard_regno_nregs (0, mode);
37862 else
37863 /* A move will cost one instruction per GPR moved. */
37864 ret = 2 * hard_regno_nregs (0, mode);
37867 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37868 else if (VECTOR_MEM_VSX_P (mode)
37869 && reg_classes_intersect_p (to, VSX_REGS)
37870 && reg_classes_intersect_p (from, VSX_REGS))
37871 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
37873 /* Moving between two similar registers is just one instruction. */
37874 else if (reg_classes_intersect_p (to, from))
37875 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37877 /* Everything else has to go through GENERAL_REGS. */
37878 else
37879 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37880 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37882 if (TARGET_DEBUG_COST)
37884 if (dbg_cost_ctrl == 1)
37885 fprintf (stderr,
37886 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37887 ret, GET_MODE_NAME (mode), reg_class_names[from],
37888 reg_class_names[to]);
37889 dbg_cost_ctrl--;
37892 return ret;
37895 /* A C expressions returning the cost of moving data of MODE from a register to
37896 or from memory. */
37898 static int
37899 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37900 bool in ATTRIBUTE_UNUSED)
37902 int ret;
37904 if (TARGET_DEBUG_COST)
37905 dbg_cost_ctrl++;
37907 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37908 ret = 4 * hard_regno_nregs (0, mode);
37909 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37910 || reg_classes_intersect_p (rclass, VSX_REGS)))
37911 ret = 4 * hard_regno_nregs (32, mode);
37912 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37913 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
37914 else
37915 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37917 if (TARGET_DEBUG_COST)
37919 if (dbg_cost_ctrl == 1)
37920 fprintf (stderr,
37921 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37922 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37923 dbg_cost_ctrl--;
37926 return ret;
37929 /* Returns a code for a target-specific builtin that implements
37930 reciprocal of the function, or NULL_TREE if not available. */
37932 static tree
37933 rs6000_builtin_reciprocal (tree fndecl)
37935 switch (DECL_FUNCTION_CODE (fndecl))
37937 case VSX_BUILTIN_XVSQRTDP:
37938 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37939 return NULL_TREE;
37941 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37943 case VSX_BUILTIN_XVSQRTSP:
37944 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37945 return NULL_TREE;
37947 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37949 default:
37950 return NULL_TREE;
37954 /* Load up a constant. If the mode is a vector mode, splat the value across
37955 all of the vector elements. */
37957 static rtx
37958 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37960 rtx reg;
37962 if (mode == SFmode || mode == DFmode)
37964 rtx d = const_double_from_real_value (dconst, mode);
37965 reg = force_reg (mode, d);
37967 else if (mode == V4SFmode)
37969 rtx d = const_double_from_real_value (dconst, SFmode);
37970 rtvec v = gen_rtvec (4, d, d, d, d);
37971 reg = gen_reg_rtx (mode);
37972 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37974 else if (mode == V2DFmode)
37976 rtx d = const_double_from_real_value (dconst, DFmode);
37977 rtvec v = gen_rtvec (2, d, d);
37978 reg = gen_reg_rtx (mode);
37979 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37981 else
37982 gcc_unreachable ();
37984 return reg;
37987 /* Generate an FMA instruction. */
37989 static void
37990 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37992 machine_mode mode = GET_MODE (target);
37993 rtx dst;
37995 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37996 gcc_assert (dst != NULL);
37998 if (dst != target)
37999 emit_move_insn (target, dst);
38002 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
38004 static void
38005 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
38007 machine_mode mode = GET_MODE (dst);
38008 rtx r;
38010 /* This is a tad more complicated, since the fnma_optab is for
38011 a different expression: fma(-m1, m2, a), which is the same
38012 thing except in the case of signed zeros.
38014 Fortunately we know that if FMA is supported that FNMSUB is
38015 also supported in the ISA. Just expand it directly. */
38017 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
38019 r = gen_rtx_NEG (mode, a);
38020 r = gen_rtx_FMA (mode, m1, m2, r);
38021 r = gen_rtx_NEG (mode, r);
38022 emit_insn (gen_rtx_SET (dst, r));
38025 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
38026 add a reg_note saying that this was a division. Support both scalar and
38027 vector divide. Assumes no trapping math and finite arguments. */
38029 void
38030 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
38032 machine_mode mode = GET_MODE (dst);
38033 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
38034 int i;
38036 /* Low precision estimates guarantee 5 bits of accuracy. High
38037 precision estimates guarantee 14 bits of accuracy. SFmode
38038 requires 23 bits of accuracy. DFmode requires 52 bits of
38039 accuracy. Each pass at least doubles the accuracy, leading
38040 to the following. */
38041 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38042 if (mode == DFmode || mode == V2DFmode)
38043 passes++;
38045 enum insn_code code = optab_handler (smul_optab, mode);
38046 insn_gen_fn gen_mul = GEN_FCN (code);
38048 gcc_assert (code != CODE_FOR_nothing);
38050 one = rs6000_load_constant_and_splat (mode, dconst1);
38052 /* x0 = 1./d estimate */
38053 x0 = gen_reg_rtx (mode);
38054 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
38055 UNSPEC_FRES)));
38057 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
38058 if (passes > 1) {
38060 /* e0 = 1. - d * x0 */
38061 e0 = gen_reg_rtx (mode);
38062 rs6000_emit_nmsub (e0, d, x0, one);
38064 /* x1 = x0 + e0 * x0 */
38065 x1 = gen_reg_rtx (mode);
38066 rs6000_emit_madd (x1, e0, x0, x0);
38068 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
38069 ++i, xprev = xnext, eprev = enext) {
38071 /* enext = eprev * eprev */
38072 enext = gen_reg_rtx (mode);
38073 emit_insn (gen_mul (enext, eprev, eprev));
38075 /* xnext = xprev + enext * xprev */
38076 xnext = gen_reg_rtx (mode);
38077 rs6000_emit_madd (xnext, enext, xprev, xprev);
38080 } else
38081 xprev = x0;
38083 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
38085 /* u = n * xprev */
38086 u = gen_reg_rtx (mode);
38087 emit_insn (gen_mul (u, n, xprev));
38089 /* v = n - (d * u) */
38090 v = gen_reg_rtx (mode);
38091 rs6000_emit_nmsub (v, d, u, n);
38093 /* dst = (v * xprev) + u */
38094 rs6000_emit_madd (dst, v, xprev, u);
38096 if (note_p)
38097 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
38100 /* Goldschmidt's Algorithm for single/double-precision floating point
38101 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
38103 void
38104 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
38106 machine_mode mode = GET_MODE (src);
38107 rtx e = gen_reg_rtx (mode);
38108 rtx g = gen_reg_rtx (mode);
38109 rtx h = gen_reg_rtx (mode);
38111 /* Low precision estimates guarantee 5 bits of accuracy. High
38112 precision estimates guarantee 14 bits of accuracy. SFmode
38113 requires 23 bits of accuracy. DFmode requires 52 bits of
38114 accuracy. Each pass at least doubles the accuracy, leading
38115 to the following. */
38116 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38117 if (mode == DFmode || mode == V2DFmode)
38118 passes++;
38120 int i;
38121 rtx mhalf;
38122 enum insn_code code = optab_handler (smul_optab, mode);
38123 insn_gen_fn gen_mul = GEN_FCN (code);
38125 gcc_assert (code != CODE_FOR_nothing);
38127 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
38129 /* e = rsqrt estimate */
38130 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
38131 UNSPEC_RSQRT)));
38133 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38134 if (!recip)
38136 rtx zero = force_reg (mode, CONST0_RTX (mode));
38138 if (mode == SFmode)
38140 rtx target = emit_conditional_move (e, GT, src, zero, mode,
38141 e, zero, mode, 0);
38142 if (target != e)
38143 emit_move_insn (e, target);
38145 else
38147 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
38148 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
38152 /* g = sqrt estimate. */
38153 emit_insn (gen_mul (g, e, src));
38154 /* h = 1/(2*sqrt) estimate. */
38155 emit_insn (gen_mul (h, e, mhalf));
38157 if (recip)
38159 if (passes == 1)
38161 rtx t = gen_reg_rtx (mode);
38162 rs6000_emit_nmsub (t, g, h, mhalf);
38163 /* Apply correction directly to 1/rsqrt estimate. */
38164 rs6000_emit_madd (dst, e, t, e);
38166 else
38168 for (i = 0; i < passes; i++)
38170 rtx t1 = gen_reg_rtx (mode);
38171 rtx g1 = gen_reg_rtx (mode);
38172 rtx h1 = gen_reg_rtx (mode);
38174 rs6000_emit_nmsub (t1, g, h, mhalf);
38175 rs6000_emit_madd (g1, g, t1, g);
38176 rs6000_emit_madd (h1, h, t1, h);
38178 g = g1;
38179 h = h1;
38181 /* Multiply by 2 for 1/rsqrt. */
38182 emit_insn (gen_add3_insn (dst, h, h));
38185 else
38187 rtx t = gen_reg_rtx (mode);
38188 rs6000_emit_nmsub (t, g, h, mhalf);
38189 rs6000_emit_madd (dst, g, t, g);
38192 return;
38195 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38196 (Power7) targets. DST is the target, and SRC is the argument operand. */
38198 void
38199 rs6000_emit_popcount (rtx dst, rtx src)
38201 machine_mode mode = GET_MODE (dst);
38202 rtx tmp1, tmp2;
38204 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38205 if (TARGET_POPCNTD)
38207 if (mode == SImode)
38208 emit_insn (gen_popcntdsi2 (dst, src));
38209 else
38210 emit_insn (gen_popcntddi2 (dst, src));
38211 return;
38214 tmp1 = gen_reg_rtx (mode);
38216 if (mode == SImode)
38218 emit_insn (gen_popcntbsi2 (tmp1, src));
38219 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
38220 NULL_RTX, 0);
38221 tmp2 = force_reg (SImode, tmp2);
38222 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
38224 else
38226 emit_insn (gen_popcntbdi2 (tmp1, src));
38227 tmp2 = expand_mult (DImode, tmp1,
38228 GEN_INT ((HOST_WIDE_INT)
38229 0x01010101 << 32 | 0x01010101),
38230 NULL_RTX, 0);
38231 tmp2 = force_reg (DImode, tmp2);
38232 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
38237 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38238 target, and SRC is the argument operand. */
38240 void
38241 rs6000_emit_parity (rtx dst, rtx src)
38243 machine_mode mode = GET_MODE (dst);
38244 rtx tmp;
38246 tmp = gen_reg_rtx (mode);
38248 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38249 if (TARGET_CMPB)
38251 if (mode == SImode)
38253 emit_insn (gen_popcntbsi2 (tmp, src));
38254 emit_insn (gen_paritysi2_cmpb (dst, tmp));
38256 else
38258 emit_insn (gen_popcntbdi2 (tmp, src));
38259 emit_insn (gen_paritydi2_cmpb (dst, tmp));
38261 return;
38264 if (mode == SImode)
38266 /* Is mult+shift >= shift+xor+shift+xor? */
38267 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
38269 rtx tmp1, tmp2, tmp3, tmp4;
38271 tmp1 = gen_reg_rtx (SImode);
38272 emit_insn (gen_popcntbsi2 (tmp1, src));
38274 tmp2 = gen_reg_rtx (SImode);
38275 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
38276 tmp3 = gen_reg_rtx (SImode);
38277 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
38279 tmp4 = gen_reg_rtx (SImode);
38280 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
38281 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
38283 else
38284 rs6000_emit_popcount (tmp, src);
38285 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
38287 else
38289 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38290 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
38292 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
38294 tmp1 = gen_reg_rtx (DImode);
38295 emit_insn (gen_popcntbdi2 (tmp1, src));
38297 tmp2 = gen_reg_rtx (DImode);
38298 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
38299 tmp3 = gen_reg_rtx (DImode);
38300 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
38302 tmp4 = gen_reg_rtx (DImode);
38303 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
38304 tmp5 = gen_reg_rtx (DImode);
38305 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
38307 tmp6 = gen_reg_rtx (DImode);
38308 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
38309 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
38311 else
38312 rs6000_emit_popcount (tmp, src);
38313 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
38317 /* Expand an Altivec constant permutation for little endian mode.
38318 OP0 and OP1 are the input vectors and TARGET is the output vector.
38319 SEL specifies the constant permutation vector.
38321 There are two issues: First, the two input operands must be
38322 swapped so that together they form a double-wide array in LE
38323 order. Second, the vperm instruction has surprising behavior
38324 in LE mode: it interprets the elements of the source vectors
38325 in BE mode ("left to right") and interprets the elements of
38326 the destination vector in LE mode ("right to left"). To
38327 correct for this, we must subtract each element of the permute
38328 control vector from 31.
38330 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38331 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38332 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38333 serve as the permute control vector. Then, in BE mode,
38335 vperm 9,10,11,12
38337 places the desired result in vr9. However, in LE mode the
38338 vector contents will be
38340 vr10 = 00000003 00000002 00000001 00000000
38341 vr11 = 00000007 00000006 00000005 00000004
38343 The result of the vperm using the same permute control vector is
38345 vr9 = 05000000 07000000 01000000 03000000
38347 That is, the leftmost 4 bytes of vr10 are interpreted as the
38348 source for the rightmost 4 bytes of vr9, and so on.
38350 If we change the permute control vector to
38352 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38354 and issue
38356 vperm 9,11,10,12
38358 we get the desired
38360 vr9 = 00000006 00000004 00000002 00000000. */
38362 static void
38363 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
38364 const vec_perm_indices &sel)
38366 unsigned int i;
38367 rtx perm[16];
38368 rtx constv, unspec;
38370 /* Unpack and adjust the constant selector. */
38371 for (i = 0; i < 16; ++i)
38373 unsigned int elt = 31 - (sel[i] & 31);
38374 perm[i] = GEN_INT (elt);
38377 /* Expand to a permute, swapping the inputs and using the
38378 adjusted selector. */
38379 if (!REG_P (op0))
38380 op0 = force_reg (V16QImode, op0);
38381 if (!REG_P (op1))
38382 op1 = force_reg (V16QImode, op1);
38384 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
38385 constv = force_reg (V16QImode, constv);
38386 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
38387 UNSPEC_VPERM);
38388 if (!REG_P (target))
38390 rtx tmp = gen_reg_rtx (V16QImode);
38391 emit_move_insn (tmp, unspec);
38392 unspec = tmp;
38395 emit_move_insn (target, unspec);
38398 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38399 permute control vector. But here it's not a constant, so we must
38400 generate a vector NAND or NOR to do the adjustment. */
38402 void
38403 altivec_expand_vec_perm_le (rtx operands[4])
38405 rtx notx, iorx, unspec;
38406 rtx target = operands[0];
38407 rtx op0 = operands[1];
38408 rtx op1 = operands[2];
38409 rtx sel = operands[3];
38410 rtx tmp = target;
38411 rtx norreg = gen_reg_rtx (V16QImode);
38412 machine_mode mode = GET_MODE (target);
38414 /* Get everything in regs so the pattern matches. */
38415 if (!REG_P (op0))
38416 op0 = force_reg (mode, op0);
38417 if (!REG_P (op1))
38418 op1 = force_reg (mode, op1);
38419 if (!REG_P (sel))
38420 sel = force_reg (V16QImode, sel);
38421 if (!REG_P (target))
38422 tmp = gen_reg_rtx (mode);
38424 if (TARGET_P9_VECTOR)
38426 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
38427 UNSPEC_VPERMR);
38429 else
38431 /* Invert the selector with a VNAND if available, else a VNOR.
38432 The VNAND is preferred for future fusion opportunities. */
38433 notx = gen_rtx_NOT (V16QImode, sel);
38434 iorx = (TARGET_P8_VECTOR
38435 ? gen_rtx_IOR (V16QImode, notx, notx)
38436 : gen_rtx_AND (V16QImode, notx, notx));
38437 emit_insn (gen_rtx_SET (norreg, iorx));
38439 /* Permute with operands reversed and adjusted selector. */
38440 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
38441 UNSPEC_VPERM);
38444 /* Copy into target, possibly by way of a register. */
38445 if (!REG_P (target))
38447 emit_move_insn (tmp, unspec);
38448 unspec = tmp;
38451 emit_move_insn (target, unspec);
38454 /* Expand an Altivec constant permutation. Return true if we match
38455 an efficient implementation; false to fall back to VPERM.
38457 OP0 and OP1 are the input vectors and TARGET is the output vector.
38458 SEL specifies the constant permutation vector. */
38460 static bool
38461 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
38462 const vec_perm_indices &sel)
38464 struct altivec_perm_insn {
38465 HOST_WIDE_INT mask;
38466 enum insn_code impl;
38467 unsigned char perm[16];
38469 static const struct altivec_perm_insn patterns[] = {
38470 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
38471 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38472 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
38473 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38474 { OPTION_MASK_ALTIVEC,
38475 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
38476 : CODE_FOR_altivec_vmrglb_direct),
38477 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38478 { OPTION_MASK_ALTIVEC,
38479 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
38480 : CODE_FOR_altivec_vmrglh_direct),
38481 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38482 { OPTION_MASK_ALTIVEC,
38483 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
38484 : CODE_FOR_altivec_vmrglw_direct),
38485 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38486 { OPTION_MASK_ALTIVEC,
38487 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
38488 : CODE_FOR_altivec_vmrghb_direct),
38489 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38490 { OPTION_MASK_ALTIVEC,
38491 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
38492 : CODE_FOR_altivec_vmrghh_direct),
38493 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38494 { OPTION_MASK_ALTIVEC,
38495 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38496 : CODE_FOR_altivec_vmrghw_direct),
38497 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38498 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38499 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38500 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38501 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38504 unsigned int i, j, elt, which;
38505 unsigned char perm[16];
38506 rtx x;
38507 bool one_vec;
38509 /* Unpack the constant selector. */
38510 for (i = which = 0; i < 16; ++i)
38512 elt = sel[i] & 31;
38513 which |= (elt < 16 ? 1 : 2);
38514 perm[i] = elt;
38517 /* Simplify the constant selector based on operands. */
38518 switch (which)
38520 default:
38521 gcc_unreachable ();
38523 case 3:
38524 one_vec = false;
38525 if (!rtx_equal_p (op0, op1))
38526 break;
38527 /* FALLTHRU */
38529 case 2:
38530 for (i = 0; i < 16; ++i)
38531 perm[i] &= 15;
38532 op0 = op1;
38533 one_vec = true;
38534 break;
38536 case 1:
38537 op1 = op0;
38538 one_vec = true;
38539 break;
38542 /* Look for splat patterns. */
38543 if (one_vec)
38545 elt = perm[0];
38547 for (i = 0; i < 16; ++i)
38548 if (perm[i] != elt)
38549 break;
38550 if (i == 16)
38552 if (!BYTES_BIG_ENDIAN)
38553 elt = 15 - elt;
38554 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38555 return true;
38558 if (elt % 2 == 0)
38560 for (i = 0; i < 16; i += 2)
38561 if (perm[i] != elt || perm[i + 1] != elt + 1)
38562 break;
38563 if (i == 16)
38565 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38566 x = gen_reg_rtx (V8HImode);
38567 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38568 GEN_INT (field)));
38569 emit_move_insn (target, gen_lowpart (V16QImode, x));
38570 return true;
38574 if (elt % 4 == 0)
38576 for (i = 0; i < 16; i += 4)
38577 if (perm[i] != elt
38578 || perm[i + 1] != elt + 1
38579 || perm[i + 2] != elt + 2
38580 || perm[i + 3] != elt + 3)
38581 break;
38582 if (i == 16)
38584 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38585 x = gen_reg_rtx (V4SImode);
38586 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38587 GEN_INT (field)));
38588 emit_move_insn (target, gen_lowpart (V16QImode, x));
38589 return true;
38594 /* Look for merge and pack patterns. */
38595 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38597 bool swapped;
38599 if ((patterns[j].mask & rs6000_isa_flags) == 0)
38600 continue;
38602 elt = patterns[j].perm[0];
38603 if (perm[0] == elt)
38604 swapped = false;
38605 else if (perm[0] == elt + 16)
38606 swapped = true;
38607 else
38608 continue;
38609 for (i = 1; i < 16; ++i)
38611 elt = patterns[j].perm[i];
38612 if (swapped)
38613 elt = (elt >= 16 ? elt - 16 : elt + 16);
38614 else if (one_vec && elt >= 16)
38615 elt -= 16;
38616 if (perm[i] != elt)
38617 break;
38619 if (i == 16)
38621 enum insn_code icode = patterns[j].impl;
38622 machine_mode omode = insn_data[icode].operand[0].mode;
38623 machine_mode imode = insn_data[icode].operand[1].mode;
38625 /* For little-endian, don't use vpkuwum and vpkuhum if the
38626 underlying vector type is not V4SI and V8HI, respectively.
38627 For example, using vpkuwum with a V8HI picks up the even
38628 halfwords (BE numbering) when the even halfwords (LE
38629 numbering) are what we need. */
38630 if (!BYTES_BIG_ENDIAN
38631 && icode == CODE_FOR_altivec_vpkuwum_direct
38632 && ((GET_CODE (op0) == REG
38633 && GET_MODE (op0) != V4SImode)
38634 || (GET_CODE (op0) == SUBREG
38635 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38636 continue;
38637 if (!BYTES_BIG_ENDIAN
38638 && icode == CODE_FOR_altivec_vpkuhum_direct
38639 && ((GET_CODE (op0) == REG
38640 && GET_MODE (op0) != V8HImode)
38641 || (GET_CODE (op0) == SUBREG
38642 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38643 continue;
38645 /* For little-endian, the two input operands must be swapped
38646 (or swapped back) to ensure proper right-to-left numbering
38647 from 0 to 2N-1. */
38648 if (swapped ^ !BYTES_BIG_ENDIAN)
38649 std::swap (op0, op1);
38650 if (imode != V16QImode)
38652 op0 = gen_lowpart (imode, op0);
38653 op1 = gen_lowpart (imode, op1);
38655 if (omode == V16QImode)
38656 x = target;
38657 else
38658 x = gen_reg_rtx (omode);
38659 emit_insn (GEN_FCN (icode) (x, op0, op1));
38660 if (omode != V16QImode)
38661 emit_move_insn (target, gen_lowpart (V16QImode, x));
38662 return true;
38666 if (!BYTES_BIG_ENDIAN)
38668 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
38669 return true;
38672 return false;
38675 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38676 Return true if we match an efficient implementation. */
38678 static bool
38679 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38680 unsigned char perm0, unsigned char perm1)
38682 rtx x;
38684 /* If both selectors come from the same operand, fold to single op. */
38685 if ((perm0 & 2) == (perm1 & 2))
38687 if (perm0 & 2)
38688 op0 = op1;
38689 else
38690 op1 = op0;
38692 /* If both operands are equal, fold to simpler permutation. */
38693 if (rtx_equal_p (op0, op1))
38695 perm0 = perm0 & 1;
38696 perm1 = (perm1 & 1) + 2;
38698 /* If the first selector comes from the second operand, swap. */
38699 else if (perm0 & 2)
38701 if (perm1 & 2)
38702 return false;
38703 perm0 -= 2;
38704 perm1 += 2;
38705 std::swap (op0, op1);
38707 /* If the second selector does not come from the second operand, fail. */
38708 else if ((perm1 & 2) == 0)
38709 return false;
38711 /* Success! */
38712 if (target != NULL)
38714 machine_mode vmode, dmode;
38715 rtvec v;
38717 vmode = GET_MODE (target);
38718 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38719 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
38720 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38721 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38722 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38723 emit_insn (gen_rtx_SET (target, x));
38725 return true;
38728 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
38730 static bool
38731 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
38732 rtx op1, const vec_perm_indices &sel)
38734 bool testing_p = !target;
38736 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38737 if (TARGET_ALTIVEC && testing_p)
38738 return true;
38740 /* Check for ps_merge*, evmerge* or xxperm* insns. */
38741 if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT)
38742 || (vmode == V2SImode && TARGET_SPE)
38743 || ((vmode == V2DFmode || vmode == V2DImode)
38744 && VECTOR_MEM_VSX_P (vmode)))
38746 if (testing_p)
38748 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38749 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38751 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
38752 return true;
38755 if (TARGET_ALTIVEC)
38757 /* Force the target-independent code to lower to V16QImode. */
38758 if (vmode != V16QImode)
38759 return false;
38760 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
38761 return true;
38764 return false;
38767 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
38768 OP0 and OP1 are the input vectors and TARGET is the output vector.
38769 PERM specifies the constant permutation vector. */
38771 static void
38772 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38773 machine_mode vmode, const vec_perm_builder &perm)
38775 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
38776 if (x != target)
38777 emit_move_insn (target, x);
38780 /* Expand an extract even operation. */
38782 void
38783 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38785 machine_mode vmode = GET_MODE (target);
38786 unsigned i, nelt = GET_MODE_NUNITS (vmode);
38787 vec_perm_builder perm (nelt, nelt, 1);
38789 for (i = 0; i < nelt; i++)
38790 perm.quick_push (i * 2);
38792 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
38795 /* Expand a vector interleave operation. */
38797 void
38798 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38800 machine_mode vmode = GET_MODE (target);
38801 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38802 vec_perm_builder perm (nelt, nelt, 1);
38804 high = (highp ? 0 : nelt / 2);
38805 for (i = 0; i < nelt / 2; i++)
38807 perm.quick_push (i + high);
38808 perm.quick_push (i + nelt + high);
38811 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
38814 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38815 void
38816 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38818 HOST_WIDE_INT hwi_scale (scale);
38819 REAL_VALUE_TYPE r_pow;
38820 rtvec v = rtvec_alloc (2);
38821 rtx elt;
38822 rtx scale_vec = gen_reg_rtx (V2DFmode);
38823 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38824 elt = const_double_from_real_value (r_pow, DFmode);
38825 RTVEC_ELT (v, 0) = elt;
38826 RTVEC_ELT (v, 1) = elt;
38827 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38828 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38831 /* Return an RTX representing where to find the function value of a
38832 function returning MODE. */
38833 static rtx
38834 rs6000_complex_function_value (machine_mode mode)
38836 unsigned int regno;
38837 rtx r1, r2;
38838 machine_mode inner = GET_MODE_INNER (mode);
38839 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38841 if (TARGET_FLOAT128_TYPE
38842 && (mode == KCmode
38843 || (mode == TCmode && TARGET_IEEEQUAD)))
38844 regno = ALTIVEC_ARG_RETURN;
38846 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38847 regno = FP_ARG_RETURN;
38849 else
38851 regno = GP_ARG_RETURN;
38853 /* 32-bit is OK since it'll go in r3/r4. */
38854 if (TARGET_32BIT && inner_bytes >= 4)
38855 return gen_rtx_REG (mode, regno);
38858 if (inner_bytes >= 8)
38859 return gen_rtx_REG (mode, regno);
38861 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38862 const0_rtx);
38863 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38864 GEN_INT (inner_bytes));
38865 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38868 /* Return an rtx describing a return value of MODE as a PARALLEL
38869 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38870 stride REG_STRIDE. */
38872 static rtx
38873 rs6000_parallel_return (machine_mode mode,
38874 int n_elts, machine_mode elt_mode,
38875 unsigned int regno, unsigned int reg_stride)
38877 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38879 int i;
38880 for (i = 0; i < n_elts; i++)
38882 rtx r = gen_rtx_REG (elt_mode, regno);
38883 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38884 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38885 regno += reg_stride;
38888 return par;
38891 /* Target hook for TARGET_FUNCTION_VALUE.
38893 On the SPE, both FPs and vectors are returned in r3.
38895 On RS/6000 an integer value is in r3 and a floating-point value is in
38896 fp1, unless -msoft-float. */
38898 static rtx
38899 rs6000_function_value (const_tree valtype,
38900 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38901 bool outgoing ATTRIBUTE_UNUSED)
38903 machine_mode mode;
38904 unsigned int regno;
38905 machine_mode elt_mode;
38906 int n_elts;
38908 /* Special handling for structs in darwin64. */
38909 if (TARGET_MACHO
38910 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38912 CUMULATIVE_ARGS valcum;
38913 rtx valret;
38915 valcum.words = 0;
38916 valcum.fregno = FP_ARG_MIN_REG;
38917 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38918 /* Do a trial code generation as if this were going to be passed as
38919 an argument; if any part goes in memory, we return NULL. */
38920 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38921 if (valret)
38922 return valret;
38923 /* Otherwise fall through to standard ABI rules. */
38926 mode = TYPE_MODE (valtype);
38928 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38929 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38931 int first_reg, n_regs;
38933 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38935 /* _Decimal128 must use even/odd register pairs. */
38936 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38937 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38939 else
38941 first_reg = ALTIVEC_ARG_RETURN;
38942 n_regs = 1;
38945 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38948 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38949 if (TARGET_32BIT && TARGET_POWERPC64)
38950 switch (mode)
38952 default:
38953 break;
38954 case E_DImode:
38955 case E_SCmode:
38956 case E_DCmode:
38957 case E_TCmode:
38958 int count = GET_MODE_SIZE (mode) / 4;
38959 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38962 if ((INTEGRAL_TYPE_P (valtype)
38963 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38964 || POINTER_TYPE_P (valtype))
38965 mode = TARGET_32BIT ? SImode : DImode;
38967 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38968 /* _Decimal128 must use an even/odd register pair. */
38969 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38970 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38971 && !FLOAT128_VECTOR_P (mode)
38972 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38973 regno = FP_ARG_RETURN;
38974 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38975 && targetm.calls.split_complex_arg)
38976 return rs6000_complex_function_value (mode);
38977 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38978 return register is used in both cases, and we won't see V2DImode/V2DFmode
38979 for pure altivec, combine the two cases. */
38980 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38981 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38982 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38983 regno = ALTIVEC_ARG_RETURN;
38984 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38985 && (mode == DFmode || mode == DCmode
38986 || FLOAT128_IBM_P (mode) || mode == TCmode))
38987 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38988 else
38989 regno = GP_ARG_RETURN;
38991 return gen_rtx_REG (mode, regno);
38994 /* Define how to find the value returned by a library function
38995 assuming the value has mode MODE. */
38997 rs6000_libcall_value (machine_mode mode)
38999 unsigned int regno;
39001 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
39002 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
39003 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
39005 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
39006 /* _Decimal128 must use an even/odd register pair. */
39007 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
39008 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
39009 && TARGET_HARD_FLOAT && TARGET_FPRS
39010 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
39011 regno = FP_ARG_RETURN;
39012 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
39013 return register is used in both cases, and we won't see V2DImode/V2DFmode
39014 for pure altivec, combine the two cases. */
39015 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
39016 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
39017 regno = ALTIVEC_ARG_RETURN;
39018 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
39019 return rs6000_complex_function_value (mode);
39020 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
39021 && (mode == DFmode || mode == DCmode
39022 || FLOAT128_IBM_P (mode) || mode == TCmode))
39023 return spe_build_register_parallel (mode, GP_ARG_RETURN);
39024 else
39025 regno = GP_ARG_RETURN;
39027 return gen_rtx_REG (mode, regno);
39031 /* Return true if we use LRA instead of reload pass. */
39032 static bool
39033 rs6000_lra_p (void)
39035 return TARGET_LRA;
39038 /* Compute register pressure classes. We implement the target hook to avoid
39039 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
39040 lead to incorrect estimates of number of available registers and therefor
39041 increased register pressure/spill. */
39042 static int
39043 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
39045 int n;
39047 n = 0;
39048 pressure_classes[n++] = GENERAL_REGS;
39049 if (TARGET_VSX)
39050 pressure_classes[n++] = VSX_REGS;
39051 else
39053 if (TARGET_ALTIVEC)
39054 pressure_classes[n++] = ALTIVEC_REGS;
39055 if (TARGET_HARD_FLOAT && TARGET_FPRS)
39056 pressure_classes[n++] = FLOAT_REGS;
39058 pressure_classes[n++] = CR_REGS;
39059 pressure_classes[n++] = SPECIAL_REGS;
39061 return n;
39064 /* Given FROM and TO register numbers, say whether this elimination is allowed.
39065 Frame pointer elimination is automatically handled.
39067 For the RS/6000, if frame pointer elimination is being done, we would like
39068 to convert ap into fp, not sp.
39070 We need r30 if -mminimal-toc was specified, and there are constant pool
39071 references. */
39073 static bool
39074 rs6000_can_eliminate (const int from, const int to)
39076 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
39077 ? ! frame_pointer_needed
39078 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
39079 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
39080 || constant_pool_empty_p ()
39081 : true);
39084 /* Define the offset between two registers, FROM to be eliminated and its
39085 replacement TO, at the start of a routine. */
39086 HOST_WIDE_INT
39087 rs6000_initial_elimination_offset (int from, int to)
39089 rs6000_stack_t *info = rs6000_stack_info ();
39090 HOST_WIDE_INT offset;
39092 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39093 offset = info->push_p ? 0 : -info->total_size;
39094 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39096 offset = info->push_p ? 0 : -info->total_size;
39097 if (FRAME_GROWS_DOWNWARD)
39098 offset += info->fixed_size + info->vars_size + info->parm_size;
39100 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39101 offset = FRAME_GROWS_DOWNWARD
39102 ? info->fixed_size + info->vars_size + info->parm_size
39103 : 0;
39104 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39105 offset = info->total_size;
39106 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39107 offset = info->push_p ? info->total_size : 0;
39108 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
39109 offset = 0;
39110 else
39111 gcc_unreachable ();
39113 return offset;
39116 static rtx
39117 rs6000_dwarf_register_span (rtx reg)
39119 rtx parts[8];
39120 int i, words;
39121 unsigned regno = REGNO (reg);
39122 machine_mode mode = GET_MODE (reg);
39124 if (TARGET_SPE
39125 && regno < 32
39126 && (SPE_VECTOR_MODE (GET_MODE (reg))
39127 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
39128 && mode != SFmode && mode != SDmode && mode != SCmode)))
39130 else
39131 return NULL_RTX;
39133 regno = REGNO (reg);
39135 /* The duality of the SPE register size wreaks all kinds of havoc.
39136 This is a way of distinguishing r0 in 32-bits from r0 in
39137 64-bits. */
39138 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
39139 gcc_assert (words <= 4);
39140 for (i = 0; i < words; i++, regno++)
39142 if (BYTES_BIG_ENDIAN)
39144 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39145 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
39147 else
39149 parts[2 * i] = gen_rtx_REG (SImode, regno);
39150 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39154 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
39157 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39159 static void
39160 rs6000_init_dwarf_reg_sizes_extra (tree address)
39162 if (TARGET_SPE)
39164 int i;
39165 machine_mode mode = TYPE_MODE (char_type_node);
39166 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39167 rtx mem = gen_rtx_MEM (BLKmode, addr);
39168 rtx value = gen_int_mode (4, mode);
39170 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
39172 int column = DWARF_REG_TO_UNWIND_COLUMN
39173 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39174 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39176 emit_move_insn (adjust_address (mem, mode, offset), value);
39180 if (TARGET_MACHO && ! TARGET_ALTIVEC)
39182 int i;
39183 machine_mode mode = TYPE_MODE (char_type_node);
39184 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39185 rtx mem = gen_rtx_MEM (BLKmode, addr);
39186 rtx value = gen_int_mode (16, mode);
39188 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39189 The unwinder still needs to know the size of Altivec registers. */
39191 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
39193 int column = DWARF_REG_TO_UNWIND_COLUMN
39194 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39195 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39197 emit_move_insn (adjust_address (mem, mode, offset), value);
39202 /* Map internal gcc register numbers to debug format register numbers.
39203 FORMAT specifies the type of debug register number to use:
39204 0 -- debug information, except for frame-related sections
39205 1 -- DWARF .debug_frame section
39206 2 -- DWARF .eh_frame section */
39208 unsigned int
39209 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
39211 /* We never use the GCC internal number for SPE high registers.
39212 Those are mapped to the 1200..1231 range for all debug formats. */
39213 if (SPE_HIGH_REGNO_P (regno))
39214 return regno - FIRST_SPE_HIGH_REGNO + 1200;
39216 /* Except for the above, we use the internal number for non-DWARF
39217 debug information, and also for .eh_frame. */
39218 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
39219 return regno;
39221 /* On some platforms, we use the standard DWARF register
39222 numbering for .debug_info and .debug_frame. */
39223 #ifdef RS6000_USE_DWARF_NUMBERING
39224 if (regno <= 63)
39225 return regno;
39226 if (regno == LR_REGNO)
39227 return 108;
39228 if (regno == CTR_REGNO)
39229 return 109;
39230 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39231 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39232 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39233 to the DWARF reg for CR. */
39234 if (format == 1 && regno == CR2_REGNO)
39235 return 64;
39236 if (CR_REGNO_P (regno))
39237 return regno - CR0_REGNO + 86;
39238 if (regno == CA_REGNO)
39239 return 101; /* XER */
39240 if (ALTIVEC_REGNO_P (regno))
39241 return regno - FIRST_ALTIVEC_REGNO + 1124;
39242 if (regno == VRSAVE_REGNO)
39243 return 356;
39244 if (regno == VSCR_REGNO)
39245 return 67;
39246 if (regno == SPE_ACC_REGNO)
39247 return 99;
39248 if (regno == SPEFSCR_REGNO)
39249 return 612;
39250 #endif
39251 return regno;
39254 /* target hook eh_return_filter_mode */
39255 static scalar_int_mode
39256 rs6000_eh_return_filter_mode (void)
39258 return TARGET_32BIT ? SImode : word_mode;
39261 /* Target hook for scalar_mode_supported_p. */
39262 static bool
39263 rs6000_scalar_mode_supported_p (scalar_mode mode)
39265 /* -m32 does not support TImode. This is the default, from
39266 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39267 same ABI as for -m32. But default_scalar_mode_supported_p allows
39268 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39269 for -mpowerpc64. */
39270 if (TARGET_32BIT && mode == TImode)
39271 return false;
39273 if (DECIMAL_FLOAT_MODE_P (mode))
39274 return default_decimal_float_supported_p ();
39275 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
39276 return true;
39277 else
39278 return default_scalar_mode_supported_p (mode);
39281 /* Target hook for vector_mode_supported_p. */
39282 static bool
39283 rs6000_vector_mode_supported_p (machine_mode mode)
39286 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
39287 return true;
39289 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
39290 return true;
39292 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39293 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39294 double-double. */
39295 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
39296 return true;
39298 else
39299 return false;
39302 /* Target hook for floatn_mode. */
39303 static opt_scalar_float_mode
39304 rs6000_floatn_mode (int n, bool extended)
39306 if (extended)
39308 switch (n)
39310 case 32:
39311 return DFmode;
39313 case 64:
39314 if (TARGET_FLOAT128_KEYWORD)
39315 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39316 else
39317 return opt_scalar_float_mode ();
39319 case 128:
39320 return opt_scalar_float_mode ();
39322 default:
39323 /* Those are the only valid _FloatNx types. */
39324 gcc_unreachable ();
39327 else
39329 switch (n)
39331 case 32:
39332 return SFmode;
39334 case 64:
39335 return DFmode;
39337 case 128:
39338 if (TARGET_FLOAT128_KEYWORD)
39339 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39340 else
39341 return opt_scalar_float_mode ();
39343 default:
39344 return opt_scalar_float_mode ();
39350 /* Target hook for c_mode_for_suffix. */
39351 static machine_mode
39352 rs6000_c_mode_for_suffix (char suffix)
39354 if (TARGET_FLOAT128_TYPE)
39356 if (suffix == 'q' || suffix == 'Q')
39357 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39359 /* At the moment, we are not defining a suffix for IBM extended double.
39360 If/when the default for -mabi=ieeelongdouble is changed, and we want
39361 to support __ibm128 constants in legacy library code, we may need to
39362 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39363 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39364 __float80 constants. */
39367 return VOIDmode;
39370 /* Target hook for invalid_arg_for_unprototyped_fn. */
39371 static const char *
39372 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
39374 return (!rs6000_darwin64_abi
39375 && typelist == 0
39376 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
39377 && (funcdecl == NULL_TREE
39378 || (TREE_CODE (funcdecl) == FUNCTION_DECL
39379 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
39380 ? N_("AltiVec argument passed to unprototyped function")
39381 : NULL;
39384 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39385 setup by using __stack_chk_fail_local hidden function instead of
39386 calling __stack_chk_fail directly. Otherwise it is better to call
39387 __stack_chk_fail directly. */
39389 static tree ATTRIBUTE_UNUSED
39390 rs6000_stack_protect_fail (void)
39392 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
39393 ? default_hidden_stack_protect_fail ()
39394 : default_external_stack_protect_fail ();
39397 void
39398 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
39399 int num_operands ATTRIBUTE_UNUSED)
39401 if (rs6000_warn_cell_microcode)
39403 const char *temp;
39404 int insn_code_number = recog_memoized (insn);
39405 location_t location = INSN_LOCATION (insn);
39407 /* Punt on insns we cannot recognize. */
39408 if (insn_code_number < 0)
39409 return;
39411 /* get_insn_template can modify recog_data, so save and restore it. */
39412 struct recog_data_d recog_data_save = recog_data;
39413 for (int i = 0; i < recog_data.n_operands; i++)
39414 recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
39415 temp = get_insn_template (insn_code_number, insn);
39416 recog_data = recog_data_save;
39418 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
39419 warning_at (location, OPT_mwarn_cell_microcode,
39420 "emitting microcode insn %s\t[%s] #%d",
39421 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39422 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
39423 warning_at (location, OPT_mwarn_cell_microcode,
39424 "emitting conditional microcode insn %s\t[%s] #%d",
39425 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39429 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39431 #if TARGET_ELF
39432 static unsigned HOST_WIDE_INT
39433 rs6000_asan_shadow_offset (void)
39435 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
39437 #endif
39439 /* Mask options that we want to support inside of attribute((target)) and
39440 #pragma GCC target operations. Note, we do not include things like
39441 64/32-bit, endianness, hard/soft floating point, etc. that would have
39442 different calling sequences. */
39444 struct rs6000_opt_mask {
39445 const char *name; /* option name */
39446 HOST_WIDE_INT mask; /* mask to set */
39447 bool invert; /* invert sense of mask */
39448 bool valid_target; /* option is a target option */
39451 static struct rs6000_opt_mask const rs6000_opt_masks[] =
39453 { "altivec", OPTION_MASK_ALTIVEC, false, true },
39454 { "cmpb", OPTION_MASK_CMPB, false, true },
39455 { "crypto", OPTION_MASK_CRYPTO, false, true },
39456 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
39457 { "dlmzb", OPTION_MASK_DLMZB, false, true },
39458 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
39459 false, true },
39460 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
39461 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
39462 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
39463 { "fprnd", OPTION_MASK_FPRND, false, true },
39464 { "hard-dfp", OPTION_MASK_DFP, false, true },
39465 { "htm", OPTION_MASK_HTM, false, true },
39466 { "isel", OPTION_MASK_ISEL, false, true },
39467 { "mfcrf", OPTION_MASK_MFCRF, false, true },
39468 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
39469 { "modulo", OPTION_MASK_MODULO, false, true },
39470 { "mulhw", OPTION_MASK_MULHW, false, true },
39471 { "multiple", OPTION_MASK_MULTIPLE, false, true },
39472 { "popcntb", OPTION_MASK_POPCNTB, false, true },
39473 { "popcntd", OPTION_MASK_POPCNTD, false, true },
39474 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
39475 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
39476 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
39477 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
39478 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
39479 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
39480 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
39481 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
39482 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
39483 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
39484 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
39485 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
39486 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
39487 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
39488 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
39489 { "string", OPTION_MASK_STRING, false, true },
39490 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
39491 { "update", OPTION_MASK_NO_UPDATE, true , true },
39492 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
39493 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
39494 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
39495 { "vsx", OPTION_MASK_VSX, false, true },
39496 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
39497 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
39498 #ifdef OPTION_MASK_64BIT
39499 #if TARGET_AIX_OS
39500 { "aix64", OPTION_MASK_64BIT, false, false },
39501 { "aix32", OPTION_MASK_64BIT, true, false },
39502 #else
39503 { "64", OPTION_MASK_64BIT, false, false },
39504 { "32", OPTION_MASK_64BIT, true, false },
39505 #endif
39506 #endif
39507 #ifdef OPTION_MASK_EABI
39508 { "eabi", OPTION_MASK_EABI, false, false },
39509 #endif
39510 #ifdef OPTION_MASK_LITTLE_ENDIAN
39511 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
39512 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
39513 #endif
39514 #ifdef OPTION_MASK_RELOCATABLE
39515 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
39516 #endif
39517 #ifdef OPTION_MASK_STRICT_ALIGN
39518 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
39519 #endif
39520 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
39521 { "string", OPTION_MASK_STRING, false, false },
39524 /* Builtin mask mapping for printing the flags. */
39525 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39527 { "altivec", RS6000_BTM_ALTIVEC, false, false },
39528 { "vsx", RS6000_BTM_VSX, false, false },
39529 { "spe", RS6000_BTM_SPE, false, false },
39530 { "paired", RS6000_BTM_PAIRED, false, false },
39531 { "fre", RS6000_BTM_FRE, false, false },
39532 { "fres", RS6000_BTM_FRES, false, false },
39533 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
39534 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
39535 { "popcntd", RS6000_BTM_POPCNTD, false, false },
39536 { "cell", RS6000_BTM_CELL, false, false },
39537 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
39538 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
39539 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
39540 { "crypto", RS6000_BTM_CRYPTO, false, false },
39541 { "htm", RS6000_BTM_HTM, false, false },
39542 { "hard-dfp", RS6000_BTM_DFP, false, false },
39543 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
39544 { "long-double-128", RS6000_BTM_LDBL128, false, false },
39545 { "float128", RS6000_BTM_FLOAT128, false, false },
39548 /* Option variables that we want to support inside attribute((target)) and
39549 #pragma GCC target operations. */
39551 struct rs6000_opt_var {
39552 const char *name; /* option name */
39553 size_t global_offset; /* offset of the option in global_options. */
39554 size_t target_offset; /* offset of the option in target options. */
39557 static struct rs6000_opt_var const rs6000_opt_vars[] =
39559 { "friz",
39560 offsetof (struct gcc_options, x_TARGET_FRIZ),
39561 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39562 { "avoid-indexed-addresses",
39563 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39564 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39565 { "paired",
39566 offsetof (struct gcc_options, x_rs6000_paired_float),
39567 offsetof (struct cl_target_option, x_rs6000_paired_float), },
39568 { "longcall",
39569 offsetof (struct gcc_options, x_rs6000_default_long_calls),
39570 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39571 { "optimize-swaps",
39572 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39573 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39574 { "allow-movmisalign",
39575 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39576 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39577 { "allow-df-permute",
39578 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39579 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39580 { "sched-groups",
39581 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39582 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39583 { "always-hint",
39584 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39585 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39586 { "align-branch-targets",
39587 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39588 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39589 { "vectorize-builtins",
39590 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39591 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39592 { "tls-markers",
39593 offsetof (struct gcc_options, x_tls_markers),
39594 offsetof (struct cl_target_option, x_tls_markers), },
39595 { "sched-prolog",
39596 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39597 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39598 { "sched-epilog",
39599 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39600 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39601 { "gen-cell-microcode",
39602 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39603 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39604 { "warn-cell-microcode",
39605 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39606 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39609 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39610 parsing. Return true if there were no errors. */
39612 static bool
39613 rs6000_inner_target_options (tree args, bool attr_p)
39615 bool ret = true;
39617 if (args == NULL_TREE)
39620 else if (TREE_CODE (args) == STRING_CST)
39622 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39623 char *q;
39625 while ((q = strtok (p, ",")) != NULL)
39627 bool error_p = false;
39628 bool not_valid_p = false;
39629 const char *cpu_opt = NULL;
39631 p = NULL;
39632 if (strncmp (q, "cpu=", 4) == 0)
39634 int cpu_index = rs6000_cpu_name_lookup (q+4);
39635 if (cpu_index >= 0)
39636 rs6000_cpu_index = cpu_index;
39637 else
39639 error_p = true;
39640 cpu_opt = q+4;
39643 else if (strncmp (q, "tune=", 5) == 0)
39645 int tune_index = rs6000_cpu_name_lookup (q+5);
39646 if (tune_index >= 0)
39647 rs6000_tune_index = tune_index;
39648 else
39650 error_p = true;
39651 cpu_opt = q+5;
39654 else
39656 size_t i;
39657 bool invert = false;
39658 char *r = q;
39660 error_p = true;
39661 if (strncmp (r, "no-", 3) == 0)
39663 invert = true;
39664 r += 3;
39667 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39668 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39670 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39672 if (!rs6000_opt_masks[i].valid_target)
39673 not_valid_p = true;
39674 else
39676 error_p = false;
39677 rs6000_isa_flags_explicit |= mask;
39679 /* VSX needs altivec, so -mvsx automagically sets
39680 altivec and disables -mavoid-indexed-addresses. */
39681 if (!invert)
39683 if (mask == OPTION_MASK_VSX)
39685 mask |= OPTION_MASK_ALTIVEC;
39686 TARGET_AVOID_XFORM = 0;
39690 if (rs6000_opt_masks[i].invert)
39691 invert = !invert;
39693 if (invert)
39694 rs6000_isa_flags &= ~mask;
39695 else
39696 rs6000_isa_flags |= mask;
39698 break;
39701 if (error_p && !not_valid_p)
39703 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39704 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39706 size_t j = rs6000_opt_vars[i].global_offset;
39707 *((int *) ((char *)&global_options + j)) = !invert;
39708 error_p = false;
39709 not_valid_p = false;
39710 break;
39715 if (error_p)
39717 const char *eprefix, *esuffix;
39719 ret = false;
39720 if (attr_p)
39722 eprefix = "__attribute__((__target__(";
39723 esuffix = ")))";
39725 else
39727 eprefix = "#pragma GCC target ";
39728 esuffix = "";
39731 if (cpu_opt)
39732 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39733 q, esuffix);
39734 else if (not_valid_p)
39735 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39736 else
39737 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39742 else if (TREE_CODE (args) == TREE_LIST)
39746 tree value = TREE_VALUE (args);
39747 if (value)
39749 bool ret2 = rs6000_inner_target_options (value, attr_p);
39750 if (!ret2)
39751 ret = false;
39753 args = TREE_CHAIN (args);
39755 while (args != NULL_TREE);
39758 else
39760 error ("attribute %<target%> argument not a string");
39761 return false;
39764 return ret;
39767 /* Print out the target options as a list for -mdebug=target. */
39769 static void
39770 rs6000_debug_target_options (tree args, const char *prefix)
39772 if (args == NULL_TREE)
39773 fprintf (stderr, "%s<NULL>", prefix);
39775 else if (TREE_CODE (args) == STRING_CST)
39777 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39778 char *q;
39780 while ((q = strtok (p, ",")) != NULL)
39782 p = NULL;
39783 fprintf (stderr, "%s\"%s\"", prefix, q);
39784 prefix = ", ";
39788 else if (TREE_CODE (args) == TREE_LIST)
39792 tree value = TREE_VALUE (args);
39793 if (value)
39795 rs6000_debug_target_options (value, prefix);
39796 prefix = ", ";
39798 args = TREE_CHAIN (args);
39800 while (args != NULL_TREE);
39803 else
39804 gcc_unreachable ();
39806 return;
39810 /* Hook to validate attribute((target("..."))). */
39812 static bool
39813 rs6000_valid_attribute_p (tree fndecl,
39814 tree ARG_UNUSED (name),
39815 tree args,
39816 int flags)
39818 struct cl_target_option cur_target;
39819 bool ret;
39820 tree old_optimize = build_optimization_node (&global_options);
39821 tree new_target, new_optimize;
39822 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39824 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39826 if (TARGET_DEBUG_TARGET)
39828 tree tname = DECL_NAME (fndecl);
39829 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39830 if (tname)
39831 fprintf (stderr, "function: %.*s\n",
39832 (int) IDENTIFIER_LENGTH (tname),
39833 IDENTIFIER_POINTER (tname));
39834 else
39835 fprintf (stderr, "function: unknown\n");
39837 fprintf (stderr, "args:");
39838 rs6000_debug_target_options (args, " ");
39839 fprintf (stderr, "\n");
39841 if (flags)
39842 fprintf (stderr, "flags: 0x%x\n", flags);
39844 fprintf (stderr, "--------------------\n");
39847 old_optimize = build_optimization_node (&global_options);
39848 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39850 /* If the function changed the optimization levels as well as setting target
39851 options, start with the optimizations specified. */
39852 if (func_optimize && func_optimize != old_optimize)
39853 cl_optimization_restore (&global_options,
39854 TREE_OPTIMIZATION (func_optimize));
39856 /* The target attributes may also change some optimization flags, so update
39857 the optimization options if necessary. */
39858 cl_target_option_save (&cur_target, &global_options);
39859 rs6000_cpu_index = rs6000_tune_index = -1;
39860 ret = rs6000_inner_target_options (args, true);
39862 /* Set up any additional state. */
39863 if (ret)
39865 ret = rs6000_option_override_internal (false);
39866 new_target = build_target_option_node (&global_options);
39868 else
39869 new_target = NULL;
39871 new_optimize = build_optimization_node (&global_options);
39873 if (!new_target)
39874 ret = false;
39876 else if (fndecl)
39878 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39880 if (old_optimize != new_optimize)
39881 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39884 cl_target_option_restore (&global_options, &cur_target);
39886 if (old_optimize != new_optimize)
39887 cl_optimization_restore (&global_options,
39888 TREE_OPTIMIZATION (old_optimize));
39890 return ret;
39894 /* Hook to validate the current #pragma GCC target and set the state, and
39895 update the macros based on what was changed. If ARGS is NULL, then
39896 POP_TARGET is used to reset the options. */
39898 bool
39899 rs6000_pragma_target_parse (tree args, tree pop_target)
39901 tree prev_tree = build_target_option_node (&global_options);
39902 tree cur_tree;
39903 struct cl_target_option *prev_opt, *cur_opt;
39904 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39905 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39907 if (TARGET_DEBUG_TARGET)
39909 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39910 fprintf (stderr, "args:");
39911 rs6000_debug_target_options (args, " ");
39912 fprintf (stderr, "\n");
39914 if (pop_target)
39916 fprintf (stderr, "pop_target:\n");
39917 debug_tree (pop_target);
39919 else
39920 fprintf (stderr, "pop_target: <NULL>\n");
39922 fprintf (stderr, "--------------------\n");
39925 if (! args)
39927 cur_tree = ((pop_target)
39928 ? pop_target
39929 : target_option_default_node);
39930 cl_target_option_restore (&global_options,
39931 TREE_TARGET_OPTION (cur_tree));
39933 else
39935 rs6000_cpu_index = rs6000_tune_index = -1;
39936 if (!rs6000_inner_target_options (args, false)
39937 || !rs6000_option_override_internal (false)
39938 || (cur_tree = build_target_option_node (&global_options))
39939 == NULL_TREE)
39941 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39942 fprintf (stderr, "invalid pragma\n");
39944 return false;
39948 target_option_current_node = cur_tree;
39950 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39951 change the macros that are defined. */
39952 if (rs6000_target_modify_macros_ptr)
39954 prev_opt = TREE_TARGET_OPTION (prev_tree);
39955 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39956 prev_flags = prev_opt->x_rs6000_isa_flags;
39958 cur_opt = TREE_TARGET_OPTION (cur_tree);
39959 cur_flags = cur_opt->x_rs6000_isa_flags;
39960 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39962 diff_bumask = (prev_bumask ^ cur_bumask);
39963 diff_flags = (prev_flags ^ cur_flags);
39965 if ((diff_flags != 0) || (diff_bumask != 0))
39967 /* Delete old macros. */
39968 rs6000_target_modify_macros_ptr (false,
39969 prev_flags & diff_flags,
39970 prev_bumask & diff_bumask);
39972 /* Define new macros. */
39973 rs6000_target_modify_macros_ptr (true,
39974 cur_flags & diff_flags,
39975 cur_bumask & diff_bumask);
39979 return true;
39983 /* Remember the last target of rs6000_set_current_function. */
39984 static GTY(()) tree rs6000_previous_fndecl;
39986 /* Establish appropriate back-end context for processing the function
39987 FNDECL. The argument might be NULL to indicate processing at top
39988 level, outside of any function scope. */
39989 static void
39990 rs6000_set_current_function (tree fndecl)
39992 tree old_tree = (rs6000_previous_fndecl
39993 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39994 : NULL_TREE);
39996 tree new_tree = (fndecl
39997 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39998 : NULL_TREE);
40000 if (TARGET_DEBUG_TARGET)
40002 bool print_final = false;
40003 fprintf (stderr, "\n==================== rs6000_set_current_function");
40005 if (fndecl)
40006 fprintf (stderr, ", fndecl %s (%p)",
40007 (DECL_NAME (fndecl)
40008 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
40009 : "<unknown>"), (void *)fndecl);
40011 if (rs6000_previous_fndecl)
40012 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
40014 fprintf (stderr, "\n");
40015 if (new_tree)
40017 fprintf (stderr, "\nnew fndecl target specific options:\n");
40018 debug_tree (new_tree);
40019 print_final = true;
40022 if (old_tree)
40024 fprintf (stderr, "\nold fndecl target specific options:\n");
40025 debug_tree (old_tree);
40026 print_final = true;
40029 if (print_final)
40030 fprintf (stderr, "--------------------\n");
40033 /* Only change the context if the function changes. This hook is called
40034 several times in the course of compiling a function, and we don't want to
40035 slow things down too much or call target_reinit when it isn't safe. */
40036 if (fndecl && fndecl != rs6000_previous_fndecl)
40038 rs6000_previous_fndecl = fndecl;
40039 if (old_tree == new_tree)
40042 else if (new_tree && new_tree != target_option_default_node)
40044 cl_target_option_restore (&global_options,
40045 TREE_TARGET_OPTION (new_tree));
40046 if (TREE_TARGET_GLOBALS (new_tree))
40047 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40048 else
40049 TREE_TARGET_GLOBALS (new_tree)
40050 = save_target_globals_default_opts ();
40053 else if (old_tree && old_tree != target_option_default_node)
40055 new_tree = target_option_current_node;
40056 cl_target_option_restore (&global_options,
40057 TREE_TARGET_OPTION (new_tree));
40058 if (TREE_TARGET_GLOBALS (new_tree))
40059 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40060 else if (new_tree == target_option_default_node)
40061 restore_target_globals (&default_target_globals);
40062 else
40063 TREE_TARGET_GLOBALS (new_tree)
40064 = save_target_globals_default_opts ();
40070 /* Save the current options */
40072 static void
40073 rs6000_function_specific_save (struct cl_target_option *ptr,
40074 struct gcc_options *opts)
40076 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
40077 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
40080 /* Restore the current options */
40082 static void
40083 rs6000_function_specific_restore (struct gcc_options *opts,
40084 struct cl_target_option *ptr)
40087 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
40088 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
40089 (void) rs6000_option_override_internal (false);
40092 /* Print the current options */
40094 static void
40095 rs6000_function_specific_print (FILE *file, int indent,
40096 struct cl_target_option *ptr)
40098 rs6000_print_isa_options (file, indent, "Isa options set",
40099 ptr->x_rs6000_isa_flags);
40101 rs6000_print_isa_options (file, indent, "Isa options explicit",
40102 ptr->x_rs6000_isa_flags_explicit);
40105 /* Helper function to print the current isa or misc options on a line. */
40107 static void
40108 rs6000_print_options_internal (FILE *file,
40109 int indent,
40110 const char *string,
40111 HOST_WIDE_INT flags,
40112 const char *prefix,
40113 const struct rs6000_opt_mask *opts,
40114 size_t num_elements)
40116 size_t i;
40117 size_t start_column = 0;
40118 size_t cur_column;
40119 size_t max_column = 120;
40120 size_t prefix_len = strlen (prefix);
40121 size_t comma_len = 0;
40122 const char *comma = "";
40124 if (indent)
40125 start_column += fprintf (file, "%*s", indent, "");
40127 if (!flags)
40129 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
40130 return;
40133 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
40135 /* Print the various mask options. */
40136 cur_column = start_column;
40137 for (i = 0; i < num_elements; i++)
40139 bool invert = opts[i].invert;
40140 const char *name = opts[i].name;
40141 const char *no_str = "";
40142 HOST_WIDE_INT mask = opts[i].mask;
40143 size_t len = comma_len + prefix_len + strlen (name);
40145 if (!invert)
40147 if ((flags & mask) == 0)
40149 no_str = "no-";
40150 len += sizeof ("no-") - 1;
40153 flags &= ~mask;
40156 else
40158 if ((flags & mask) != 0)
40160 no_str = "no-";
40161 len += sizeof ("no-") - 1;
40164 flags |= mask;
40167 cur_column += len;
40168 if (cur_column > max_column)
40170 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
40171 cur_column = start_column + len;
40172 comma = "";
40175 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
40176 comma = ", ";
40177 comma_len = sizeof (", ") - 1;
40180 fputs ("\n", file);
40183 /* Helper function to print the current isa options on a line. */
40185 static void
40186 rs6000_print_isa_options (FILE *file, int indent, const char *string,
40187 HOST_WIDE_INT flags)
40189 rs6000_print_options_internal (file, indent, string, flags, "-m",
40190 &rs6000_opt_masks[0],
40191 ARRAY_SIZE (rs6000_opt_masks));
40194 static void
40195 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
40196 HOST_WIDE_INT flags)
40198 rs6000_print_options_internal (file, indent, string, flags, "",
40199 &rs6000_builtin_mask_names[0],
40200 ARRAY_SIZE (rs6000_builtin_mask_names));
40203 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40204 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40205 -mvsx-timode, -mupper-regs-df).
40207 If the user used -mno-power8-vector, we need to turn off all of the implicit
40208 ISA 2.07 and 3.0 options that relate to the vector unit.
40210 If the user used -mno-power9-vector, we need to turn off all of the implicit
40211 ISA 3.0 options that relate to the vector unit.
40213 This function does not handle explicit options such as the user specifying
40214 -mdirect-move. These are handled in rs6000_option_override_internal, and
40215 the appropriate error is given if needed.
40217 We return a mask of all of the implicit options that should not be enabled
40218 by default. */
40220 static HOST_WIDE_INT
40221 rs6000_disable_incompatible_switches (void)
40223 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
40224 size_t i, j;
40226 static const struct {
40227 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
40228 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
40229 const char *const name; /* name of the switch. */
40230 } flags[] = {
40231 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
40232 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
40233 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
40236 for (i = 0; i < ARRAY_SIZE (flags); i++)
40238 HOST_WIDE_INT no_flag = flags[i].no_flag;
40240 if ((rs6000_isa_flags & no_flag) == 0
40241 && (rs6000_isa_flags_explicit & no_flag) != 0)
40243 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
40244 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
40245 & rs6000_isa_flags
40246 & dep_flags);
40248 if (set_flags)
40250 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
40251 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
40253 set_flags &= ~rs6000_opt_masks[j].mask;
40254 error ("-mno-%s turns off -m%s",
40255 flags[i].name,
40256 rs6000_opt_masks[j].name);
40259 gcc_assert (!set_flags);
40262 rs6000_isa_flags &= ~dep_flags;
40263 ignore_masks |= no_flag | dep_flags;
40267 if (!TARGET_P9_VECTOR
40268 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
40269 && TARGET_P9_DFORM_BOTH > 0)
40271 error ("-mno-power9-vector turns off -mpower9-dform");
40272 TARGET_P9_DFORM_BOTH = 0;
40275 return ignore_masks;
40279 /* Hook to determine if one function can safely inline another. */
40281 static bool
40282 rs6000_can_inline_p (tree caller, tree callee)
40284 bool ret = false;
40285 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
40286 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
40288 /* If callee has no option attributes, then it is ok to inline. */
40289 if (!callee_tree)
40290 ret = true;
40292 /* If caller has no option attributes, but callee does then it is not ok to
40293 inline. */
40294 else if (!caller_tree)
40295 ret = false;
40297 else
40299 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
40300 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
40302 /* Callee's options should a subset of the caller's, i.e. a vsx function
40303 can inline an altivec function but a non-vsx function can't inline a
40304 vsx function. */
40305 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
40306 == callee_opts->x_rs6000_isa_flags)
40307 ret = true;
40310 if (TARGET_DEBUG_TARGET)
40311 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40312 (DECL_NAME (caller)
40313 ? IDENTIFIER_POINTER (DECL_NAME (caller))
40314 : "<unknown>"),
40315 (DECL_NAME (callee)
40316 ? IDENTIFIER_POINTER (DECL_NAME (callee))
40317 : "<unknown>"),
40318 (ret ? "can" : "cannot"));
40320 return ret;
40323 /* Allocate a stack temp and fixup the address so it meets the particular
40324 memory requirements (either offetable or REG+REG addressing). */
40327 rs6000_allocate_stack_temp (machine_mode mode,
40328 bool offsettable_p,
40329 bool reg_reg_p)
40331 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40332 rtx addr = XEXP (stack, 0);
40333 int strict_p = (reload_in_progress || reload_completed);
40335 if (!legitimate_indirect_address_p (addr, strict_p))
40337 if (offsettable_p
40338 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
40339 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40341 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
40342 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40345 return stack;
40348 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40349 to such a form to deal with memory reference instructions like STFIWX that
40350 only take reg+reg addressing. */
40353 rs6000_address_for_fpconvert (rtx x)
40355 int strict_p = (reload_in_progress || reload_completed);
40356 rtx addr;
40358 gcc_assert (MEM_P (x));
40359 addr = XEXP (x, 0);
40360 if (! legitimate_indirect_address_p (addr, strict_p)
40361 && ! legitimate_indexed_address_p (addr, strict_p))
40363 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
40365 rtx reg = XEXP (addr, 0);
40366 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
40367 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
40368 gcc_assert (REG_P (reg));
40369 emit_insn (gen_add3_insn (reg, reg, size_rtx));
40370 addr = reg;
40372 else if (GET_CODE (addr) == PRE_MODIFY)
40374 rtx reg = XEXP (addr, 0);
40375 rtx expr = XEXP (addr, 1);
40376 gcc_assert (REG_P (reg));
40377 gcc_assert (GET_CODE (expr) == PLUS);
40378 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
40379 addr = reg;
40382 x = replace_equiv_address (x, copy_addr_to_reg (addr));
40385 return x;
40388 /* Given a memory reference, if it is not in the form for altivec memory
40389 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40390 convert to the altivec format. */
40393 rs6000_address_for_altivec (rtx x)
40395 gcc_assert (MEM_P (x));
40396 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
40398 rtx addr = XEXP (x, 0);
40399 int strict_p = (reload_in_progress || reload_completed);
40401 if (!legitimate_indexed_address_p (addr, strict_p)
40402 && !legitimate_indirect_address_p (addr, strict_p))
40403 addr = copy_to_mode_reg (Pmode, addr);
40405 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
40406 x = change_address (x, GET_MODE (x), addr);
40409 return x;
40412 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40414 On the RS/6000, all integer constants are acceptable, most won't be valid
40415 for particular insns, though. Only easy FP constants are acceptable. */
40417 static bool
40418 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
40420 if (TARGET_ELF && tls_referenced_p (x))
40421 return false;
40423 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
40424 || GET_MODE (x) == VOIDmode
40425 || (TARGET_POWERPC64 && mode == DImode)
40426 || easy_fp_constant (x, mode)
40427 || easy_vector_constant (x, mode));
40431 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40433 static bool
40434 chain_already_loaded (rtx_insn *last)
40436 for (; last != NULL; last = PREV_INSN (last))
40438 if (NONJUMP_INSN_P (last))
40440 rtx patt = PATTERN (last);
40442 if (GET_CODE (patt) == SET)
40444 rtx lhs = XEXP (patt, 0);
40446 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
40447 return true;
40451 return false;
40454 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40456 void
40457 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40459 const bool direct_call_p
40460 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
40461 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
40462 rtx toc_load = NULL_RTX;
40463 rtx toc_restore = NULL_RTX;
40464 rtx func_addr;
40465 rtx abi_reg = NULL_RTX;
40466 rtx call[4];
40467 int n_call;
40468 rtx insn;
40470 /* Handle longcall attributes. */
40471 if (INTVAL (cookie) & CALL_LONG)
40472 func_desc = rs6000_longcall_ref (func_desc);
40474 /* Handle indirect calls. */
40475 if (GET_CODE (func_desc) != SYMBOL_REF
40476 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
40478 /* Save the TOC into its reserved slot before the call,
40479 and prepare to restore it after the call. */
40480 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
40481 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
40482 rtx stack_toc_mem = gen_frame_mem (Pmode,
40483 gen_rtx_PLUS (Pmode, stack_ptr,
40484 stack_toc_offset));
40485 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
40486 gen_rtvec (1, stack_toc_offset),
40487 UNSPEC_TOCSLOT);
40488 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
40490 /* Can we optimize saving the TOC in the prologue or
40491 do we need to do it at every call? */
40492 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
40493 cfun->machine->save_toc_in_prologue = true;
40494 else
40496 MEM_VOLATILE_P (stack_toc_mem) = 1;
40497 emit_move_insn (stack_toc_mem, toc_reg);
40500 if (DEFAULT_ABI == ABI_ELFv2)
40502 /* A function pointer in the ELFv2 ABI is just a plain address, but
40503 the ABI requires it to be loaded into r12 before the call. */
40504 func_addr = gen_rtx_REG (Pmode, 12);
40505 emit_move_insn (func_addr, func_desc);
40506 abi_reg = func_addr;
40508 else
40510 /* A function pointer under AIX is a pointer to a data area whose
40511 first word contains the actual address of the function, whose
40512 second word contains a pointer to its TOC, and whose third word
40513 contains a value to place in the static chain register (r11).
40514 Note that if we load the static chain, our "trampoline" need
40515 not have any executable code. */
40517 /* Load up address of the actual function. */
40518 func_desc = force_reg (Pmode, func_desc);
40519 func_addr = gen_reg_rtx (Pmode);
40520 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
40522 /* Prepare to load the TOC of the called function. Note that the
40523 TOC load must happen immediately before the actual call so
40524 that unwinding the TOC registers works correctly. See the
40525 comment in frob_update_context. */
40526 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
40527 rtx func_toc_mem = gen_rtx_MEM (Pmode,
40528 gen_rtx_PLUS (Pmode, func_desc,
40529 func_toc_offset));
40530 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
40532 /* If we have a static chain, load it up. But, if the call was
40533 originally direct, the 3rd word has not been written since no
40534 trampoline has been built, so we ought not to load it, lest we
40535 override a static chain value. */
40536 if (!direct_call_p
40537 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40538 && !chain_already_loaded (get_current_sequence ()->next->last))
40540 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
40541 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
40542 rtx func_sc_mem = gen_rtx_MEM (Pmode,
40543 gen_rtx_PLUS (Pmode, func_desc,
40544 func_sc_offset));
40545 emit_move_insn (sc_reg, func_sc_mem);
40546 abi_reg = sc_reg;
40550 else
40552 /* Direct calls use the TOC: for local calls, the callee will
40553 assume the TOC register is set; for non-local calls, the
40554 PLT stub needs the TOC register. */
40555 abi_reg = toc_reg;
40556 func_addr = func_desc;
40559 /* Create the call. */
40560 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40561 if (value != NULL_RTX)
40562 call[0] = gen_rtx_SET (value, call[0]);
40563 n_call = 1;
40565 if (toc_load)
40566 call[n_call++] = toc_load;
40567 if (toc_restore)
40568 call[n_call++] = toc_restore;
40570 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40572 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40573 insn = emit_call_insn (insn);
40575 /* Mention all registers defined by the ABI to hold information
40576 as uses in CALL_INSN_FUNCTION_USAGE. */
40577 if (abi_reg)
40578 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40581 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40583 void
40584 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40586 rtx call[2];
40587 rtx insn;
40589 gcc_assert (INTVAL (cookie) == 0);
40591 /* Create the call. */
40592 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40593 if (value != NULL_RTX)
40594 call[0] = gen_rtx_SET (value, call[0]);
40596 call[1] = simple_return_rtx;
40598 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40599 insn = emit_call_insn (insn);
40601 /* Note use of the TOC register. */
40602 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40605 /* Return whether we need to always update the saved TOC pointer when we update
40606 the stack pointer. */
40608 static bool
40609 rs6000_save_toc_in_prologue_p (void)
40611 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40614 #ifdef HAVE_GAS_HIDDEN
40615 # define USE_HIDDEN_LINKONCE 1
40616 #else
40617 # define USE_HIDDEN_LINKONCE 0
40618 #endif
40620 /* Fills in the label name that should be used for a 476 link stack thunk. */
40622 void
40623 get_ppc476_thunk_name (char name[32])
40625 gcc_assert (TARGET_LINK_STACK);
40627 if (USE_HIDDEN_LINKONCE)
40628 sprintf (name, "__ppc476.get_thunk");
40629 else
40630 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40633 /* This function emits the simple thunk routine that is used to preserve
40634 the link stack on the 476 cpu. */
40636 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40637 static void
40638 rs6000_code_end (void)
40640 char name[32];
40641 tree decl;
40643 if (!TARGET_LINK_STACK)
40644 return;
40646 get_ppc476_thunk_name (name);
40648 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40649 build_function_type_list (void_type_node, NULL_TREE));
40650 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40651 NULL_TREE, void_type_node);
40652 TREE_PUBLIC (decl) = 1;
40653 TREE_STATIC (decl) = 1;
40655 #if RS6000_WEAK
40656 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40658 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40659 targetm.asm_out.unique_section (decl, 0);
40660 switch_to_section (get_named_section (decl, NULL, 0));
40661 DECL_WEAK (decl) = 1;
40662 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40663 targetm.asm_out.globalize_label (asm_out_file, name);
40664 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40665 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40667 else
40668 #endif
40670 switch_to_section (text_section);
40671 ASM_OUTPUT_LABEL (asm_out_file, name);
40674 DECL_INITIAL (decl) = make_node (BLOCK);
40675 current_function_decl = decl;
40676 allocate_struct_function (decl, false);
40677 init_function_start (decl);
40678 first_function_block_is_cold = false;
40679 /* Make sure unwind info is emitted for the thunk if needed. */
40680 final_start_function (emit_barrier (), asm_out_file, 1);
40682 fputs ("\tblr\n", asm_out_file);
40684 final_end_function ();
40685 init_insn_lengths ();
40686 free_after_compilation (cfun);
40687 set_cfun (NULL);
40688 current_function_decl = NULL;
40691 /* Add r30 to hard reg set if the prologue sets it up and it is not
40692 pic_offset_table_rtx. */
40694 static void
40695 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40697 if (!TARGET_SINGLE_PIC_BASE
40698 && TARGET_TOC
40699 && TARGET_MINIMAL_TOC
40700 && !constant_pool_empty_p ())
40701 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40702 if (cfun->machine->split_stack_argp_used)
40703 add_to_hard_reg_set (&set->set, Pmode, 12);
40707 /* Helper function for rs6000_split_logical to emit a logical instruction after
40708 spliting the operation to single GPR registers.
40710 DEST is the destination register.
40711 OP1 and OP2 are the input source registers.
40712 CODE is the base operation (AND, IOR, XOR, NOT).
40713 MODE is the machine mode.
40714 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40715 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40716 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40718 static void
40719 rs6000_split_logical_inner (rtx dest,
40720 rtx op1,
40721 rtx op2,
40722 enum rtx_code code,
40723 machine_mode mode,
40724 bool complement_final_p,
40725 bool complement_op1_p,
40726 bool complement_op2_p)
40728 rtx bool_rtx;
40730 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40731 if (op2 && GET_CODE (op2) == CONST_INT
40732 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40733 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40735 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40736 HOST_WIDE_INT value = INTVAL (op2) & mask;
40738 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40739 if (code == AND)
40741 if (value == 0)
40743 emit_insn (gen_rtx_SET (dest, const0_rtx));
40744 return;
40747 else if (value == mask)
40749 if (!rtx_equal_p (dest, op1))
40750 emit_insn (gen_rtx_SET (dest, op1));
40751 return;
40755 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40756 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40757 else if (code == IOR || code == XOR)
40759 if (value == 0)
40761 if (!rtx_equal_p (dest, op1))
40762 emit_insn (gen_rtx_SET (dest, op1));
40763 return;
40768 if (code == AND && mode == SImode
40769 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40771 emit_insn (gen_andsi3 (dest, op1, op2));
40772 return;
40775 if (complement_op1_p)
40776 op1 = gen_rtx_NOT (mode, op1);
40778 if (complement_op2_p)
40779 op2 = gen_rtx_NOT (mode, op2);
40781 /* For canonical RTL, if only one arm is inverted it is the first. */
40782 if (!complement_op1_p && complement_op2_p)
40783 std::swap (op1, op2);
40785 bool_rtx = ((code == NOT)
40786 ? gen_rtx_NOT (mode, op1)
40787 : gen_rtx_fmt_ee (code, mode, op1, op2));
40789 if (complement_final_p)
40790 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40792 emit_insn (gen_rtx_SET (dest, bool_rtx));
40795 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40796 operations are split immediately during RTL generation to allow for more
40797 optimizations of the AND/IOR/XOR.
40799 OPERANDS is an array containing the destination and two input operands.
40800 CODE is the base operation (AND, IOR, XOR, NOT).
40801 MODE is the machine mode.
40802 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40803 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40804 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40805 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40806 formation of the AND instructions. */
40808 static void
40809 rs6000_split_logical_di (rtx operands[3],
40810 enum rtx_code code,
40811 bool complement_final_p,
40812 bool complement_op1_p,
40813 bool complement_op2_p)
40815 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40816 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40817 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40818 enum hi_lo { hi = 0, lo = 1 };
40819 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40820 size_t i;
40822 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40823 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40824 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40825 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40827 if (code == NOT)
40828 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40829 else
40831 if (GET_CODE (operands[2]) != CONST_INT)
40833 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40834 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40836 else
40838 HOST_WIDE_INT value = INTVAL (operands[2]);
40839 HOST_WIDE_INT value_hi_lo[2];
40841 gcc_assert (!complement_final_p);
40842 gcc_assert (!complement_op1_p);
40843 gcc_assert (!complement_op2_p);
40845 value_hi_lo[hi] = value >> 32;
40846 value_hi_lo[lo] = value & lower_32bits;
40848 for (i = 0; i < 2; i++)
40850 HOST_WIDE_INT sub_value = value_hi_lo[i];
40852 if (sub_value & sign_bit)
40853 sub_value |= upper_32bits;
40855 op2_hi_lo[i] = GEN_INT (sub_value);
40857 /* If this is an AND instruction, check to see if we need to load
40858 the value in a register. */
40859 if (code == AND && sub_value != -1 && sub_value != 0
40860 && !and_operand (op2_hi_lo[i], SImode))
40861 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40866 for (i = 0; i < 2; i++)
40868 /* Split large IOR/XOR operations. */
40869 if ((code == IOR || code == XOR)
40870 && GET_CODE (op2_hi_lo[i]) == CONST_INT
40871 && !complement_final_p
40872 && !complement_op1_p
40873 && !complement_op2_p
40874 && !logical_const_operand (op2_hi_lo[i], SImode))
40876 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40877 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40878 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40879 rtx tmp = gen_reg_rtx (SImode);
40881 /* Make sure the constant is sign extended. */
40882 if ((hi_16bits & sign_bit) != 0)
40883 hi_16bits |= upper_32bits;
40885 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40886 code, SImode, false, false, false);
40888 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40889 code, SImode, false, false, false);
40891 else
40892 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40893 code, SImode, complement_final_p,
40894 complement_op1_p, complement_op2_p);
40897 return;
40900 /* Split the insns that make up boolean operations operating on multiple GPR
40901 registers. The boolean MD patterns ensure that the inputs either are
40902 exactly the same as the output registers, or there is no overlap.
40904 OPERANDS is an array containing the destination and two input operands.
40905 CODE is the base operation (AND, IOR, XOR, NOT).
40906 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40907 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40908 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40910 void
40911 rs6000_split_logical (rtx operands[3],
40912 enum rtx_code code,
40913 bool complement_final_p,
40914 bool complement_op1_p,
40915 bool complement_op2_p)
40917 machine_mode mode = GET_MODE (operands[0]);
40918 machine_mode sub_mode;
40919 rtx op0, op1, op2;
40920 int sub_size, regno0, regno1, nregs, i;
40922 /* If this is DImode, use the specialized version that can run before
40923 register allocation. */
40924 if (mode == DImode && !TARGET_POWERPC64)
40926 rs6000_split_logical_di (operands, code, complement_final_p,
40927 complement_op1_p, complement_op2_p);
40928 return;
40931 op0 = operands[0];
40932 op1 = operands[1];
40933 op2 = (code == NOT) ? NULL_RTX : operands[2];
40934 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40935 sub_size = GET_MODE_SIZE (sub_mode);
40936 regno0 = REGNO (op0);
40937 regno1 = REGNO (op1);
40939 gcc_assert (reload_completed);
40940 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40941 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40943 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40944 gcc_assert (nregs > 1);
40946 if (op2 && REG_P (op2))
40947 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40949 for (i = 0; i < nregs; i++)
40951 int offset = i * sub_size;
40952 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40953 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40954 rtx sub_op2 = ((code == NOT)
40955 ? NULL_RTX
40956 : simplify_subreg (sub_mode, op2, mode, offset));
40958 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40959 complement_final_p, complement_op1_p,
40960 complement_op2_p);
40963 return;
40967 /* Return true if the peephole2 can combine a load involving a combination of
40968 an addis instruction and a load with an offset that can be fused together on
40969 a power8. */
40971 bool
40972 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40973 rtx addis_value, /* addis value. */
40974 rtx target, /* target register that is loaded. */
40975 rtx mem) /* bottom part of the memory addr. */
40977 rtx addr;
40978 rtx base_reg;
40980 /* Validate arguments. */
40981 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40982 return false;
40984 if (!base_reg_operand (target, GET_MODE (target)))
40985 return false;
40987 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40988 return false;
40990 /* Allow sign/zero extension. */
40991 if (GET_CODE (mem) == ZERO_EXTEND
40992 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40993 mem = XEXP (mem, 0);
40995 if (!MEM_P (mem))
40996 return false;
40998 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40999 return false;
41001 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41002 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
41003 return false;
41005 /* Validate that the register used to load the high value is either the
41006 register being loaded, or we can safely replace its use.
41008 This function is only called from the peephole2 pass and we assume that
41009 there are 2 instructions in the peephole (addis and load), so we want to
41010 check if the target register was not used in the memory address and the
41011 register to hold the addis result is dead after the peephole. */
41012 if (REGNO (addis_reg) != REGNO (target))
41014 if (reg_mentioned_p (target, mem))
41015 return false;
41017 if (!peep2_reg_dead_p (2, addis_reg))
41018 return false;
41020 /* If the target register being loaded is the stack pointer, we must
41021 avoid loading any other value into it, even temporarily. */
41022 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
41023 return false;
41026 base_reg = XEXP (addr, 0);
41027 return REGNO (addis_reg) == REGNO (base_reg);
41030 /* During the peephole2 pass, adjust and expand the insns for a load fusion
41031 sequence. We adjust the addis register to use the target register. If the
41032 load sign extends, we adjust the code to do the zero extending load, and an
41033 explicit sign extension later since the fusion only covers zero extending
41034 loads.
41036 The operands are:
41037 operands[0] register set with addis (to be replaced with target)
41038 operands[1] value set via addis
41039 operands[2] target register being loaded
41040 operands[3] D-form memory reference using operands[0]. */
41042 void
41043 expand_fusion_gpr_load (rtx *operands)
41045 rtx addis_value = operands[1];
41046 rtx target = operands[2];
41047 rtx orig_mem = operands[3];
41048 rtx new_addr, new_mem, orig_addr, offset;
41049 enum rtx_code plus_or_lo_sum;
41050 machine_mode target_mode = GET_MODE (target);
41051 machine_mode extend_mode = target_mode;
41052 machine_mode ptr_mode = Pmode;
41053 enum rtx_code extend = UNKNOWN;
41055 if (GET_CODE (orig_mem) == ZERO_EXTEND
41056 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
41058 extend = GET_CODE (orig_mem);
41059 orig_mem = XEXP (orig_mem, 0);
41060 target_mode = GET_MODE (orig_mem);
41063 gcc_assert (MEM_P (orig_mem));
41065 orig_addr = XEXP (orig_mem, 0);
41066 plus_or_lo_sum = GET_CODE (orig_addr);
41067 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41069 offset = XEXP (orig_addr, 1);
41070 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41071 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41073 if (extend != UNKNOWN)
41074 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
41076 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41077 UNSPEC_FUSION_GPR);
41078 emit_insn (gen_rtx_SET (target, new_mem));
41080 if (extend == SIGN_EXTEND)
41082 int sub_off = ((BYTES_BIG_ENDIAN)
41083 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
41084 : 0);
41085 rtx sign_reg
41086 = simplify_subreg (target_mode, target, extend_mode, sub_off);
41088 emit_insn (gen_rtx_SET (target,
41089 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
41092 return;
41095 /* Emit the addis instruction that will be part of a fused instruction
41096 sequence. */
41098 void
41099 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
41100 const char *mode_name)
41102 rtx fuse_ops[10];
41103 char insn_template[80];
41104 const char *addis_str = NULL;
41105 const char *comment_str = ASM_COMMENT_START;
41107 if (*comment_str == ' ')
41108 comment_str++;
41110 /* Emit the addis instruction. */
41111 fuse_ops[0] = target;
41112 if (satisfies_constraint_L (addis_value))
41114 fuse_ops[1] = addis_value;
41115 addis_str = "lis %0,%v1";
41118 else if (GET_CODE (addis_value) == PLUS)
41120 rtx op0 = XEXP (addis_value, 0);
41121 rtx op1 = XEXP (addis_value, 1);
41123 if (REG_P (op0) && CONST_INT_P (op1)
41124 && satisfies_constraint_L (op1))
41126 fuse_ops[1] = op0;
41127 fuse_ops[2] = op1;
41128 addis_str = "addis %0,%1,%v2";
41132 else if (GET_CODE (addis_value) == HIGH)
41134 rtx value = XEXP (addis_value, 0);
41135 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
41137 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
41138 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
41139 if (TARGET_ELF)
41140 addis_str = "addis %0,%2,%1@toc@ha";
41142 else if (TARGET_XCOFF)
41143 addis_str = "addis %0,%1@u(%2)";
41145 else
41146 gcc_unreachable ();
41149 else if (GET_CODE (value) == PLUS)
41151 rtx op0 = XEXP (value, 0);
41152 rtx op1 = XEXP (value, 1);
41154 if (GET_CODE (op0) == UNSPEC
41155 && XINT (op0, 1) == UNSPEC_TOCREL
41156 && CONST_INT_P (op1))
41158 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
41159 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
41160 fuse_ops[3] = op1;
41161 if (TARGET_ELF)
41162 addis_str = "addis %0,%2,%1+%3@toc@ha";
41164 else if (TARGET_XCOFF)
41165 addis_str = "addis %0,%1+%3@u(%2)";
41167 else
41168 gcc_unreachable ();
41172 else if (satisfies_constraint_L (value))
41174 fuse_ops[1] = value;
41175 addis_str = "lis %0,%v1";
41178 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
41180 fuse_ops[1] = value;
41181 addis_str = "lis %0,%1@ha";
41185 if (!addis_str)
41186 fatal_insn ("Could not generate addis value for fusion", addis_value);
41188 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
41189 comment, mode_name);
41190 output_asm_insn (insn_template, fuse_ops);
41193 /* Emit a D-form load or store instruction that is the second instruction
41194 of a fusion sequence. */
41196 void
41197 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
41198 const char *insn_str)
41200 rtx fuse_ops[10];
41201 char insn_template[80];
41203 fuse_ops[0] = load_store_reg;
41204 fuse_ops[1] = addis_reg;
41206 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
41208 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
41209 fuse_ops[2] = offset;
41210 output_asm_insn (insn_template, fuse_ops);
41213 else if (GET_CODE (offset) == UNSPEC
41214 && XINT (offset, 1) == UNSPEC_TOCREL)
41216 if (TARGET_ELF)
41217 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
41219 else if (TARGET_XCOFF)
41220 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41222 else
41223 gcc_unreachable ();
41225 fuse_ops[2] = XVECEXP (offset, 0, 0);
41226 output_asm_insn (insn_template, fuse_ops);
41229 else if (GET_CODE (offset) == PLUS
41230 && GET_CODE (XEXP (offset, 0)) == UNSPEC
41231 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
41232 && CONST_INT_P (XEXP (offset, 1)))
41234 rtx tocrel_unspec = XEXP (offset, 0);
41235 if (TARGET_ELF)
41236 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
41238 else if (TARGET_XCOFF)
41239 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
41241 else
41242 gcc_unreachable ();
41244 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
41245 fuse_ops[3] = XEXP (offset, 1);
41246 output_asm_insn (insn_template, fuse_ops);
41249 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
41251 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41253 fuse_ops[2] = offset;
41254 output_asm_insn (insn_template, fuse_ops);
41257 else
41258 fatal_insn ("Unable to generate load/store offset for fusion", offset);
41260 return;
41263 /* Wrap a TOC address that can be fused to indicate that special fusion
41264 processing is needed. */
41267 fusion_wrap_memory_address (rtx old_mem)
41269 rtx old_addr = XEXP (old_mem, 0);
41270 rtvec v = gen_rtvec (1, old_addr);
41271 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
41272 return replace_equiv_address_nv (old_mem, new_addr, false);
41275 /* Given an address, convert it into the addis and load offset parts. Addresses
41276 created during the peephole2 process look like:
41277 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41278 (unspec [(...)] UNSPEC_TOCREL))
41280 Addresses created via toc fusion look like:
41281 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41283 static void
41284 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
41286 rtx hi, lo;
41288 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
41290 lo = XVECEXP (addr, 0, 0);
41291 hi = gen_rtx_HIGH (Pmode, lo);
41293 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
41295 hi = XEXP (addr, 0);
41296 lo = XEXP (addr, 1);
41298 else
41299 gcc_unreachable ();
41301 *p_hi = hi;
41302 *p_lo = lo;
41305 /* Return a string to fuse an addis instruction with a gpr load to the same
41306 register that we loaded up the addis instruction. The address that is used
41307 is the logical address that was formed during peephole2:
41308 (lo_sum (high) (low-part))
41310 Or the address is the TOC address that is wrapped before register allocation:
41311 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41313 The code is complicated, so we call output_asm_insn directly, and just
41314 return "". */
41316 const char *
41317 emit_fusion_gpr_load (rtx target, rtx mem)
41319 rtx addis_value;
41320 rtx addr;
41321 rtx load_offset;
41322 const char *load_str = NULL;
41323 const char *mode_name = NULL;
41324 machine_mode mode;
41326 if (GET_CODE (mem) == ZERO_EXTEND)
41327 mem = XEXP (mem, 0);
41329 gcc_assert (REG_P (target) && MEM_P (mem));
41331 addr = XEXP (mem, 0);
41332 fusion_split_address (addr, &addis_value, &load_offset);
41334 /* Now emit the load instruction to the same register. */
41335 mode = GET_MODE (mem);
41336 switch (mode)
41338 case E_QImode:
41339 mode_name = "char";
41340 load_str = "lbz";
41341 break;
41343 case E_HImode:
41344 mode_name = "short";
41345 load_str = "lhz";
41346 break;
41348 case E_SImode:
41349 case E_SFmode:
41350 mode_name = (mode == SFmode) ? "float" : "int";
41351 load_str = "lwz";
41352 break;
41354 case E_DImode:
41355 case E_DFmode:
41356 gcc_assert (TARGET_POWERPC64);
41357 mode_name = (mode == DFmode) ? "double" : "long";
41358 load_str = "ld";
41359 break;
41361 default:
41362 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
41365 /* Emit the addis instruction. */
41366 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
41368 /* Emit the D-form load instruction. */
41369 emit_fusion_load_store (target, target, load_offset, load_str);
41371 return "";
41375 /* Return true if the peephole2 can combine a load/store involving a
41376 combination of an addis instruction and the memory operation. This was
41377 added to the ISA 3.0 (power9) hardware. */
41379 bool
41380 fusion_p9_p (rtx addis_reg, /* register set via addis. */
41381 rtx addis_value, /* addis value. */
41382 rtx dest, /* destination (memory or register). */
41383 rtx src) /* source (register or memory). */
41385 rtx addr, mem, offset;
41386 machine_mode mode = GET_MODE (src);
41388 /* Validate arguments. */
41389 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
41390 return false;
41392 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
41393 return false;
41395 /* Ignore extend operations that are part of the load. */
41396 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
41397 src = XEXP (src, 0);
41399 /* Test for memory<-register or register<-memory. */
41400 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
41402 if (!MEM_P (dest))
41403 return false;
41405 mem = dest;
41408 else if (MEM_P (src))
41410 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
41411 return false;
41413 mem = src;
41416 else
41417 return false;
41419 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41420 if (GET_CODE (addr) == PLUS)
41422 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41423 return false;
41425 return satisfies_constraint_I (XEXP (addr, 1));
41428 else if (GET_CODE (addr) == LO_SUM)
41430 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41431 return false;
41433 offset = XEXP (addr, 1);
41434 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
41435 return small_toc_ref (offset, GET_MODE (offset));
41437 else if (TARGET_ELF && !TARGET_POWERPC64)
41438 return CONSTANT_P (offset);
41441 return false;
41444 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41445 load sequence.
41447 The operands are:
41448 operands[0] register set with addis
41449 operands[1] value set via addis
41450 operands[2] target register being loaded
41451 operands[3] D-form memory reference using operands[0].
41453 This is similar to the fusion introduced with power8, except it scales to
41454 both loads/stores and does not require the result register to be the same as
41455 the base register. At the moment, we only do this if register set with addis
41456 is dead. */
41458 void
41459 expand_fusion_p9_load (rtx *operands)
41461 rtx tmp_reg = operands[0];
41462 rtx addis_value = operands[1];
41463 rtx target = operands[2];
41464 rtx orig_mem = operands[3];
41465 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
41466 enum rtx_code plus_or_lo_sum;
41467 machine_mode target_mode = GET_MODE (target);
41468 machine_mode extend_mode = target_mode;
41469 machine_mode ptr_mode = Pmode;
41470 enum rtx_code extend = UNKNOWN;
41472 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
41474 extend = GET_CODE (orig_mem);
41475 orig_mem = XEXP (orig_mem, 0);
41476 target_mode = GET_MODE (orig_mem);
41479 gcc_assert (MEM_P (orig_mem));
41481 orig_addr = XEXP (orig_mem, 0);
41482 plus_or_lo_sum = GET_CODE (orig_addr);
41483 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41485 offset = XEXP (orig_addr, 1);
41486 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41487 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41489 if (extend != UNKNOWN)
41490 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
41492 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41493 UNSPEC_FUSION_P9);
41495 set = gen_rtx_SET (target, new_mem);
41496 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41497 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41498 emit_insn (insn);
41500 return;
41503 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41504 store sequence.
41506 The operands are:
41507 operands[0] register set with addis
41508 operands[1] value set via addis
41509 operands[2] target D-form memory being stored to
41510 operands[3] register being stored
41512 This is similar to the fusion introduced with power8, except it scales to
41513 both loads/stores and does not require the result register to be the same as
41514 the base register. At the moment, we only do this if register set with addis
41515 is dead. */
41517 void
41518 expand_fusion_p9_store (rtx *operands)
41520 rtx tmp_reg = operands[0];
41521 rtx addis_value = operands[1];
41522 rtx orig_mem = operands[2];
41523 rtx src = operands[3];
41524 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
41525 enum rtx_code plus_or_lo_sum;
41526 machine_mode target_mode = GET_MODE (orig_mem);
41527 machine_mode ptr_mode = Pmode;
41529 gcc_assert (MEM_P (orig_mem));
41531 orig_addr = XEXP (orig_mem, 0);
41532 plus_or_lo_sum = GET_CODE (orig_addr);
41533 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41535 offset = XEXP (orig_addr, 1);
41536 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41537 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41539 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
41540 UNSPEC_FUSION_P9);
41542 set = gen_rtx_SET (new_mem, new_src);
41543 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41544 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41545 emit_insn (insn);
41547 return;
41550 /* Return a string to fuse an addis instruction with a load using extended
41551 fusion. The address that is used is the logical address that was formed
41552 during peephole2: (lo_sum (high) (low-part))
41554 The code is complicated, so we call output_asm_insn directly, and just
41555 return "". */
41557 const char *
41558 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41560 machine_mode mode = GET_MODE (reg);
41561 rtx hi;
41562 rtx lo;
41563 rtx addr;
41564 const char *load_string;
41565 int r;
41567 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41569 mem = XEXP (mem, 0);
41570 mode = GET_MODE (mem);
41573 if (GET_CODE (reg) == SUBREG)
41575 gcc_assert (SUBREG_BYTE (reg) == 0);
41576 reg = SUBREG_REG (reg);
41579 if (!REG_P (reg))
41580 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41582 r = REGNO (reg);
41583 if (FP_REGNO_P (r))
41585 if (mode == SFmode)
41586 load_string = "lfs";
41587 else if (mode == DFmode || mode == DImode)
41588 load_string = "lfd";
41589 else
41590 gcc_unreachable ();
41592 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41594 if (mode == SFmode)
41595 load_string = "lxssp";
41596 else if (mode == DFmode || mode == DImode)
41597 load_string = "lxsd";
41598 else
41599 gcc_unreachable ();
41601 else if (INT_REGNO_P (r))
41603 switch (mode)
41605 case E_QImode:
41606 load_string = "lbz";
41607 break;
41608 case E_HImode:
41609 load_string = "lhz";
41610 break;
41611 case E_SImode:
41612 case E_SFmode:
41613 load_string = "lwz";
41614 break;
41615 case E_DImode:
41616 case E_DFmode:
41617 if (!TARGET_POWERPC64)
41618 gcc_unreachable ();
41619 load_string = "ld";
41620 break;
41621 default:
41622 gcc_unreachable ();
41625 else
41626 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41628 if (!MEM_P (mem))
41629 fatal_insn ("emit_fusion_p9_load not MEM", mem);
41631 addr = XEXP (mem, 0);
41632 fusion_split_address (addr, &hi, &lo);
41634 /* Emit the addis instruction. */
41635 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41637 /* Emit the D-form load instruction. */
41638 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41640 return "";
41643 /* Return a string to fuse an addis instruction with a store using extended
41644 fusion. The address that is used is the logical address that was formed
41645 during peephole2: (lo_sum (high) (low-part))
41647 The code is complicated, so we call output_asm_insn directly, and just
41648 return "". */
41650 const char *
41651 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41653 machine_mode mode = GET_MODE (reg);
41654 rtx hi;
41655 rtx lo;
41656 rtx addr;
41657 const char *store_string;
41658 int r;
41660 if (GET_CODE (reg) == SUBREG)
41662 gcc_assert (SUBREG_BYTE (reg) == 0);
41663 reg = SUBREG_REG (reg);
41666 if (!REG_P (reg))
41667 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41669 r = REGNO (reg);
41670 if (FP_REGNO_P (r))
41672 if (mode == SFmode)
41673 store_string = "stfs";
41674 else if (mode == DFmode)
41675 store_string = "stfd";
41676 else
41677 gcc_unreachable ();
41679 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41681 if (mode == SFmode)
41682 store_string = "stxssp";
41683 else if (mode == DFmode || mode == DImode)
41684 store_string = "stxsd";
41685 else
41686 gcc_unreachable ();
41688 else if (INT_REGNO_P (r))
41690 switch (mode)
41692 case E_QImode:
41693 store_string = "stb";
41694 break;
41695 case E_HImode:
41696 store_string = "sth";
41697 break;
41698 case E_SImode:
41699 case E_SFmode:
41700 store_string = "stw";
41701 break;
41702 case E_DImode:
41703 case E_DFmode:
41704 if (!TARGET_POWERPC64)
41705 gcc_unreachable ();
41706 store_string = "std";
41707 break;
41708 default:
41709 gcc_unreachable ();
41712 else
41713 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41715 if (!MEM_P (mem))
41716 fatal_insn ("emit_fusion_p9_store not MEM", mem);
41718 addr = XEXP (mem, 0);
41719 fusion_split_address (addr, &hi, &lo);
41721 /* Emit the addis instruction. */
41722 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41724 /* Emit the D-form load instruction. */
41725 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41727 return "";
41731 /* Analyze vector computations and remove unnecessary doubleword
41732 swaps (xxswapdi instructions). This pass is performed only
41733 for little-endian VSX code generation.
41735 For this specific case, loads and stores of 4x32 and 2x64 vectors
41736 are inefficient. These are implemented using the lvx2dx and
41737 stvx2dx instructions, which invert the order of doublewords in
41738 a vector register. Thus the code generation inserts an xxswapdi
41739 after each such load, and prior to each such store. (For spill
41740 code after register assignment, an additional xxswapdi is inserted
41741 following each store in order to return a hard register to its
41742 unpermuted value.)
41744 The extra xxswapdi instructions reduce performance. This can be
41745 particularly bad for vectorized code. The purpose of this pass
41746 is to reduce the number of xxswapdi instructions required for
41747 correctness.
41749 The primary insight is that much code that operates on vectors
41750 does not care about the relative order of elements in a register,
41751 so long as the correct memory order is preserved. If we have
41752 a computation where all input values are provided by lvxd2x/xxswapdi
41753 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41754 and all intermediate computations are pure SIMD (independent of
41755 element order), then all the xxswapdi's associated with the loads
41756 and stores may be removed.
41758 This pass uses some of the infrastructure and logical ideas from
41759 the "web" pass in web.c. We create maximal webs of computations
41760 fitting the description above using union-find. Each such web is
41761 then optimized by removing its unnecessary xxswapdi instructions.
41763 The pass is placed prior to global optimization so that we can
41764 perform the optimization in the safest and simplest way possible;
41765 that is, by replacing each xxswapdi insn with a register copy insn.
41766 Subsequent forward propagation will remove copies where possible.
41768 There are some operations sensitive to element order for which we
41769 can still allow the operation, provided we modify those operations.
41770 These include CONST_VECTORs, for which we must swap the first and
41771 second halves of the constant vector; and SUBREGs, for which we
41772 must adjust the byte offset to account for the swapped doublewords.
41773 A remaining opportunity would be non-immediate-form splats, for
41774 which we should adjust the selected lane of the input. We should
41775 also make code generation adjustments for sum-across operations,
41776 since this is a common vectorizer reduction.
41778 Because we run prior to the first split, we can see loads and stores
41779 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41780 vector loads and stores that have not yet been split into a permuting
41781 load/store and a swap. (One way this can happen is with a builtin
41782 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41783 than deleting a swap, we convert the load/store into a permuting
41784 load/store (which effectively removes the swap). */
41786 /* Notes on Permutes
41788 We do not currently handle computations that contain permutes. There
41789 is a general transformation that can be performed correctly, but it
41790 may introduce more expensive code than it replaces. To handle these
41791 would require a cost model to determine when to perform the optimization.
41792 This commentary records how this could be done if desired.
41794 The most general permute is something like this (example for V16QI):
41796 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41797 (parallel [(const_int a0) (const_int a1)
41799 (const_int a14) (const_int a15)]))
41801 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41802 to produce in the result.
41804 Regardless of mode, we can convert the PARALLEL to a mask of 16
41805 byte-element selectors. Let's call this M, with M[i] representing
41806 the ith byte-element selector value. Then if we swap doublewords
41807 throughout the computation, we can get correct behavior by replacing
41808 M with M' as follows:
41810 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41811 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41813 This seems promising at first, since we are just replacing one mask
41814 with another. But certain masks are preferable to others. If M
41815 is a mask that matches a vmrghh pattern, for example, M' certainly
41816 will not. Instead of a single vmrghh, we would generate a load of
41817 M' and a vperm. So we would need to know how many xxswapd's we can
41818 remove as a result of this transformation to determine if it's
41819 profitable; and preferably the logic would need to be aware of all
41820 the special preferable masks.
41822 Another form of permute is an UNSPEC_VPERM, in which the mask is
41823 already in a register. In some cases, this mask may be a constant
41824 that we can discover with ud-chains, in which case the above
41825 transformation is ok. However, the common usage here is for the
41826 mask to be produced by an UNSPEC_LVSL, in which case the mask
41827 cannot be known at compile time. In such a case we would have to
41828 generate several instructions to compute M' as above at run time,
41829 and a cost model is needed again.
41831 However, when the mask M for an UNSPEC_VPERM is loaded from the
41832 constant pool, we can replace M with M' as above at no cost
41833 beyond adding a constant pool entry. */
41835 /* This is based on the union-find logic in web.c. web_entry_base is
41836 defined in df.h. */
41837 class swap_web_entry : public web_entry_base
41839 public:
41840 /* Pointer to the insn. */
41841 rtx_insn *insn;
41842 /* Set if insn contains a mention of a vector register. All other
41843 fields are undefined if this field is unset. */
41844 unsigned int is_relevant : 1;
41845 /* Set if insn is a load. */
41846 unsigned int is_load : 1;
41847 /* Set if insn is a store. */
41848 unsigned int is_store : 1;
41849 /* Set if insn is a doubleword swap. This can either be a register swap
41850 or a permuting load or store (test is_load and is_store for this). */
41851 unsigned int is_swap : 1;
41852 /* Set if the insn has a live-in use of a parameter register. */
41853 unsigned int is_live_in : 1;
41854 /* Set if the insn has a live-out def of a return register. */
41855 unsigned int is_live_out : 1;
41856 /* Set if the insn contains a subreg reference of a vector register. */
41857 unsigned int contains_subreg : 1;
41858 /* Set if the insn contains a 128-bit integer operand. */
41859 unsigned int is_128_int : 1;
41860 /* Set if this is a call-insn. */
41861 unsigned int is_call : 1;
41862 /* Set if this insn does not perform a vector operation for which
41863 element order matters, or if we know how to fix it up if it does.
41864 Undefined if is_swap is set. */
41865 unsigned int is_swappable : 1;
41866 /* A nonzero value indicates what kind of special handling for this
41867 insn is required if doublewords are swapped. Undefined if
41868 is_swappable is not set. */
41869 unsigned int special_handling : 4;
41870 /* Set if the web represented by this entry cannot be optimized. */
41871 unsigned int web_not_optimizable : 1;
41872 /* Set if this insn should be deleted. */
41873 unsigned int will_delete : 1;
41876 enum special_handling_values {
41877 SH_NONE = 0,
41878 SH_CONST_VECTOR,
41879 SH_SUBREG,
41880 SH_NOSWAP_LD,
41881 SH_NOSWAP_ST,
41882 SH_EXTRACT,
41883 SH_SPLAT,
41884 SH_XXPERMDI,
41885 SH_CONCAT,
41886 SH_VPERM
41889 /* Union INSN with all insns containing definitions that reach USE.
41890 Detect whether USE is live-in to the current function. */
41891 static void
41892 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41894 struct df_link *link = DF_REF_CHAIN (use);
41896 if (!link)
41897 insn_entry[INSN_UID (insn)].is_live_in = 1;
41899 while (link)
41901 if (DF_REF_IS_ARTIFICIAL (link->ref))
41902 insn_entry[INSN_UID (insn)].is_live_in = 1;
41904 if (DF_REF_INSN_INFO (link->ref))
41906 rtx def_insn = DF_REF_INSN (link->ref);
41907 (void)unionfind_union (insn_entry + INSN_UID (insn),
41908 insn_entry + INSN_UID (def_insn));
41911 link = link->next;
41915 /* Union INSN with all insns containing uses reached from DEF.
41916 Detect whether DEF is live-out from the current function. */
41917 static void
41918 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41920 struct df_link *link = DF_REF_CHAIN (def);
41922 if (!link)
41923 insn_entry[INSN_UID (insn)].is_live_out = 1;
41925 while (link)
41927 /* This could be an eh use or some other artificial use;
41928 we treat these all the same (killing the optimization). */
41929 if (DF_REF_IS_ARTIFICIAL (link->ref))
41930 insn_entry[INSN_UID (insn)].is_live_out = 1;
41932 if (DF_REF_INSN_INFO (link->ref))
41934 rtx use_insn = DF_REF_INSN (link->ref);
41935 (void)unionfind_union (insn_entry + INSN_UID (insn),
41936 insn_entry + INSN_UID (use_insn));
41939 link = link->next;
41943 /* Return 1 iff INSN is a load insn, including permuting loads that
41944 represent an lvxd2x instruction; else return 0. */
41945 static unsigned int
41946 insn_is_load_p (rtx insn)
41948 rtx body = PATTERN (insn);
41950 if (GET_CODE (body) == SET)
41952 if (GET_CODE (SET_SRC (body)) == MEM)
41953 return 1;
41955 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41956 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41957 return 1;
41959 return 0;
41962 if (GET_CODE (body) != PARALLEL)
41963 return 0;
41965 rtx set = XVECEXP (body, 0, 0);
41967 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41968 return 1;
41970 return 0;
41973 /* Return 1 iff INSN is a store insn, including permuting stores that
41974 represent an stvxd2x instruction; else return 0. */
41975 static unsigned int
41976 insn_is_store_p (rtx insn)
41978 rtx body = PATTERN (insn);
41979 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41980 return 1;
41981 if (GET_CODE (body) != PARALLEL)
41982 return 0;
41983 rtx set = XVECEXP (body, 0, 0);
41984 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41985 return 1;
41986 return 0;
41989 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41990 a permuting load, or a permuting store. */
41991 static unsigned int
41992 insn_is_swap_p (rtx insn)
41994 rtx body = PATTERN (insn);
41995 if (GET_CODE (body) != SET)
41996 return 0;
41997 rtx rhs = SET_SRC (body);
41998 if (GET_CODE (rhs) != VEC_SELECT)
41999 return 0;
42000 rtx parallel = XEXP (rhs, 1);
42001 if (GET_CODE (parallel) != PARALLEL)
42002 return 0;
42003 unsigned int len = XVECLEN (parallel, 0);
42004 if (len != 2 && len != 4 && len != 8 && len != 16)
42005 return 0;
42006 for (unsigned int i = 0; i < len / 2; ++i)
42008 rtx op = XVECEXP (parallel, 0, i);
42009 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
42010 return 0;
42012 for (unsigned int i = len / 2; i < len; ++i)
42014 rtx op = XVECEXP (parallel, 0, i);
42015 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
42016 return 0;
42018 return 1;
42021 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
42022 static bool
42023 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
42025 unsigned uid = INSN_UID (insn);
42026 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
42027 return false;
42029 /* Find the unique use in the swap and locate its def. If the def
42030 isn't unique, punt. */
42031 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42032 df_ref use;
42033 FOR_EACH_INSN_INFO_USE (use, insn_info)
42035 struct df_link *def_link = DF_REF_CHAIN (use);
42036 if (!def_link || def_link->next)
42037 return false;
42039 rtx def_insn = DF_REF_INSN (def_link->ref);
42040 unsigned uid2 = INSN_UID (def_insn);
42041 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
42042 return false;
42044 rtx body = PATTERN (def_insn);
42045 if (GET_CODE (body) != SET
42046 || GET_CODE (SET_SRC (body)) != VEC_SELECT
42047 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
42048 return false;
42050 rtx mem = XEXP (SET_SRC (body), 0);
42051 rtx base_reg = XEXP (mem, 0);
42053 df_ref base_use;
42054 insn_info = DF_INSN_INFO_GET (def_insn);
42055 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42057 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42058 continue;
42060 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42061 if (!base_def_link || base_def_link->next)
42062 return false;
42064 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
42065 rtx tocrel_body = PATTERN (tocrel_insn);
42066 rtx base, offset;
42067 if (GET_CODE (tocrel_body) != SET)
42068 return false;
42069 /* There is an extra level of indirection for small/large
42070 code models. */
42071 rtx tocrel_expr = SET_SRC (tocrel_body);
42072 if (GET_CODE (tocrel_expr) == MEM)
42073 tocrel_expr = XEXP (tocrel_expr, 0);
42074 if (!toc_relative_expr_p (tocrel_expr, false))
42075 return false;
42076 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42077 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
42078 return false;
42081 return true;
42084 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
42085 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
42086 static bool
42087 v2df_reduction_p (rtx op)
42089 if (GET_MODE (op) != V2DFmode)
42090 return false;
42092 enum rtx_code code = GET_CODE (op);
42093 if (code != PLUS && code != SMIN && code != SMAX)
42094 return false;
42096 rtx concat = XEXP (op, 0);
42097 if (GET_CODE (concat) != VEC_CONCAT)
42098 return false;
42100 rtx select0 = XEXP (concat, 0);
42101 rtx select1 = XEXP (concat, 1);
42102 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
42103 return false;
42105 rtx reg0 = XEXP (select0, 0);
42106 rtx reg1 = XEXP (select1, 0);
42107 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
42108 return false;
42110 rtx parallel0 = XEXP (select0, 1);
42111 rtx parallel1 = XEXP (select1, 1);
42112 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
42113 return false;
42115 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
42116 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
42117 return false;
42119 return true;
42122 /* Return 1 iff OP is an operand that will not be affected by having
42123 vector doublewords swapped in memory. */
42124 static unsigned int
42125 rtx_is_swappable_p (rtx op, unsigned int *special)
42127 enum rtx_code code = GET_CODE (op);
42128 int i, j;
42129 rtx parallel;
42131 switch (code)
42133 case LABEL_REF:
42134 case SYMBOL_REF:
42135 case CLOBBER:
42136 case REG:
42137 return 1;
42139 case VEC_CONCAT:
42140 case ASM_INPUT:
42141 case ASM_OPERANDS:
42142 return 0;
42144 case CONST_VECTOR:
42146 *special = SH_CONST_VECTOR;
42147 return 1;
42150 case VEC_DUPLICATE:
42151 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42152 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42153 it represents a vector splat for which we can do special
42154 handling. */
42155 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
42156 return 1;
42157 else if (REG_P (XEXP (op, 0))
42158 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42159 /* This catches V2DF and V2DI splat, at a minimum. */
42160 return 1;
42161 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
42162 && REG_P (XEXP (XEXP (op, 0), 0))
42163 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42164 /* This catches splat of a truncated value. */
42165 return 1;
42166 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
42167 /* If the duplicated item is from a select, defer to the select
42168 processing to see if we can change the lane for the splat. */
42169 return rtx_is_swappable_p (XEXP (op, 0), special);
42170 else
42171 return 0;
42173 case VEC_SELECT:
42174 /* A vec_extract operation is ok if we change the lane. */
42175 if (GET_CODE (XEXP (op, 0)) == REG
42176 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
42177 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42178 && XVECLEN (parallel, 0) == 1
42179 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
42181 *special = SH_EXTRACT;
42182 return 1;
42184 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42185 XXPERMDI is a swap operation, it will be identified by
42186 insn_is_swap_p and therefore we won't get here. */
42187 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
42188 && (GET_MODE (XEXP (op, 0)) == V4DFmode
42189 || GET_MODE (XEXP (op, 0)) == V4DImode)
42190 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42191 && XVECLEN (parallel, 0) == 2
42192 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
42193 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
42195 *special = SH_XXPERMDI;
42196 return 1;
42198 else if (v2df_reduction_p (op))
42199 return 1;
42200 else
42201 return 0;
42203 case UNSPEC:
42205 /* Various operations are unsafe for this optimization, at least
42206 without significant additional work. Permutes are obviously
42207 problematic, as both the permute control vector and the ordering
42208 of the target values are invalidated by doubleword swapping.
42209 Vector pack and unpack modify the number of vector lanes.
42210 Merge-high/low will not operate correctly on swapped operands.
42211 Vector shifts across element boundaries are clearly uncool,
42212 as are vector select and concatenate operations. Vector
42213 sum-across instructions define one operand with a specific
42214 order-dependent element, so additional fixup code would be
42215 needed to make those work. Vector set and non-immediate-form
42216 vector splat are element-order sensitive. A few of these
42217 cases might be workable with special handling if required.
42218 Adding cost modeling would be appropriate in some cases. */
42219 int val = XINT (op, 1);
42220 switch (val)
42222 default:
42223 break;
42224 case UNSPEC_VMRGH_DIRECT:
42225 case UNSPEC_VMRGL_DIRECT:
42226 case UNSPEC_VPACK_SIGN_SIGN_SAT:
42227 case UNSPEC_VPACK_SIGN_UNS_SAT:
42228 case UNSPEC_VPACK_UNS_UNS_MOD:
42229 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
42230 case UNSPEC_VPACK_UNS_UNS_SAT:
42231 case UNSPEC_VPERM:
42232 case UNSPEC_VPERM_UNS:
42233 case UNSPEC_VPERMHI:
42234 case UNSPEC_VPERMSI:
42235 case UNSPEC_VPKPX:
42236 case UNSPEC_VSLDOI:
42237 case UNSPEC_VSLO:
42238 case UNSPEC_VSRO:
42239 case UNSPEC_VSUM2SWS:
42240 case UNSPEC_VSUM4S:
42241 case UNSPEC_VSUM4UBS:
42242 case UNSPEC_VSUMSWS:
42243 case UNSPEC_VSUMSWS_DIRECT:
42244 case UNSPEC_VSX_CONCAT:
42245 case UNSPEC_VSX_SET:
42246 case UNSPEC_VSX_SLDWI:
42247 case UNSPEC_VUNPACK_HI_SIGN:
42248 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
42249 case UNSPEC_VUNPACK_LO_SIGN:
42250 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
42251 case UNSPEC_VUPKHPX:
42252 case UNSPEC_VUPKHS_V4SF:
42253 case UNSPEC_VUPKHU_V4SF:
42254 case UNSPEC_VUPKLPX:
42255 case UNSPEC_VUPKLS_V4SF:
42256 case UNSPEC_VUPKLU_V4SF:
42257 case UNSPEC_VSX_CVDPSPN:
42258 case UNSPEC_VSX_CVSPDP:
42259 case UNSPEC_VSX_CVSPDPN:
42260 case UNSPEC_VSX_EXTRACT:
42261 case UNSPEC_VSX_VSLO:
42262 case UNSPEC_VSX_VEC_INIT:
42263 return 0;
42264 case UNSPEC_VSPLT_DIRECT:
42265 case UNSPEC_VSX_XXSPLTD:
42266 *special = SH_SPLAT;
42267 return 1;
42268 case UNSPEC_REDUC_PLUS:
42269 case UNSPEC_REDUC:
42270 return 1;
42274 default:
42275 break;
42278 const char *fmt = GET_RTX_FORMAT (code);
42279 int ok = 1;
42281 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42282 if (fmt[i] == 'e' || fmt[i] == 'u')
42284 unsigned int special_op = SH_NONE;
42285 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
42286 if (special_op == SH_NONE)
42287 continue;
42288 /* Ensure we never have two kinds of special handling
42289 for the same insn. */
42290 if (*special != SH_NONE && *special != special_op)
42291 return 0;
42292 *special = special_op;
42294 else if (fmt[i] == 'E')
42295 for (j = 0; j < XVECLEN (op, i); ++j)
42297 unsigned int special_op = SH_NONE;
42298 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
42299 if (special_op == SH_NONE)
42300 continue;
42301 /* Ensure we never have two kinds of special handling
42302 for the same insn. */
42303 if (*special != SH_NONE && *special != special_op)
42304 return 0;
42305 *special = special_op;
42308 return ok;
42311 /* Return 1 iff INSN is an operand that will not be affected by
42312 having vector doublewords swapped in memory (in which case
42313 *SPECIAL is unchanged), or that can be modified to be correct
42314 if vector doublewords are swapped in memory (in which case
42315 *SPECIAL is changed to a value indicating how). */
42316 static unsigned int
42317 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
42318 unsigned int *special)
42320 /* Calls are always bad. */
42321 if (GET_CODE (insn) == CALL_INSN)
42322 return 0;
42324 /* Loads and stores seen here are not permuting, but we can still
42325 fix them up by converting them to permuting ones. Exceptions:
42326 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42327 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42328 for the SET source. Also we must now make an exception for lvx
42329 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42330 explicit "& -16") since this leads to unrecognizable insns. */
42331 rtx body = PATTERN (insn);
42332 int i = INSN_UID (insn);
42334 if (insn_entry[i].is_load)
42336 if (GET_CODE (body) == SET)
42338 rtx rhs = SET_SRC (body);
42339 /* Even without a swap, the RHS might be a vec_select for, say,
42340 a byte-reversing load. */
42341 if (GET_CODE (rhs) != MEM)
42342 return 0;
42343 if (GET_CODE (XEXP (rhs, 0)) == AND)
42344 return 0;
42346 *special = SH_NOSWAP_LD;
42347 return 1;
42349 else
42350 return 0;
42353 if (insn_entry[i].is_store)
42355 if (GET_CODE (body) == SET
42356 && GET_CODE (SET_SRC (body)) != UNSPEC)
42358 rtx lhs = SET_DEST (body);
42359 /* Even without a swap, the LHS might be a vec_select for, say,
42360 a byte-reversing store. */
42361 if (GET_CODE (lhs) != MEM)
42362 return 0;
42363 if (GET_CODE (XEXP (lhs, 0)) == AND)
42364 return 0;
42366 *special = SH_NOSWAP_ST;
42367 return 1;
42369 else
42370 return 0;
42373 /* A convert to single precision can be left as is provided that
42374 all of its uses are in xxspltw instructions that splat BE element
42375 zero. */
42376 if (GET_CODE (body) == SET
42377 && GET_CODE (SET_SRC (body)) == UNSPEC
42378 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
42380 df_ref def;
42381 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42383 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42385 struct df_link *link = DF_REF_CHAIN (def);
42386 if (!link)
42387 return 0;
42389 for (; link; link = link->next) {
42390 rtx use_insn = DF_REF_INSN (link->ref);
42391 rtx use_body = PATTERN (use_insn);
42392 if (GET_CODE (use_body) != SET
42393 || GET_CODE (SET_SRC (use_body)) != UNSPEC
42394 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
42395 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
42396 return 0;
42400 return 1;
42403 /* A concatenation of two doublewords is ok if we reverse the
42404 order of the inputs. */
42405 if (GET_CODE (body) == SET
42406 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
42407 && (GET_MODE (SET_SRC (body)) == V2DFmode
42408 || GET_MODE (SET_SRC (body)) == V2DImode))
42410 *special = SH_CONCAT;
42411 return 1;
42414 /* V2DF reductions are always swappable. */
42415 if (GET_CODE (body) == PARALLEL)
42417 rtx expr = XVECEXP (body, 0, 0);
42418 if (GET_CODE (expr) == SET
42419 && v2df_reduction_p (SET_SRC (expr)))
42420 return 1;
42423 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42424 constant pool. */
42425 if (GET_CODE (body) == SET
42426 && GET_CODE (SET_SRC (body)) == UNSPEC
42427 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
42428 && XVECLEN (SET_SRC (body), 0) == 3
42429 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
42431 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
42432 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42433 df_ref use;
42434 FOR_EACH_INSN_INFO_USE (use, insn_info)
42435 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42437 struct df_link *def_link = DF_REF_CHAIN (use);
42438 /* Punt if multiple definitions for this reg. */
42439 if (def_link && !def_link->next &&
42440 const_load_sequence_p (insn_entry,
42441 DF_REF_INSN (def_link->ref)))
42443 *special = SH_VPERM;
42444 return 1;
42449 /* Otherwise check the operands for vector lane violations. */
42450 return rtx_is_swappable_p (body, special);
42453 enum chain_purpose { FOR_LOADS, FOR_STORES };
42455 /* Return true if the UD or DU chain headed by LINK is non-empty,
42456 and every entry on the chain references an insn that is a
42457 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42458 register swap must have only permuting loads as reaching defs.
42459 If PURPOSE is FOR_STORES, each such register swap must have only
42460 register swaps or permuting stores as reached uses. */
42461 static bool
42462 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
42463 enum chain_purpose purpose)
42465 if (!link)
42466 return false;
42468 for (; link; link = link->next)
42470 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
42471 continue;
42473 if (DF_REF_IS_ARTIFICIAL (link->ref))
42474 return false;
42476 rtx reached_insn = DF_REF_INSN (link->ref);
42477 unsigned uid = INSN_UID (reached_insn);
42478 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
42480 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
42481 || insn_entry[uid].is_store)
42482 return false;
42484 if (purpose == FOR_LOADS)
42486 df_ref use;
42487 FOR_EACH_INSN_INFO_USE (use, insn_info)
42489 struct df_link *swap_link = DF_REF_CHAIN (use);
42491 while (swap_link)
42493 if (DF_REF_IS_ARTIFICIAL (link->ref))
42494 return false;
42496 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
42497 unsigned uid2 = INSN_UID (swap_def_insn);
42499 /* Only permuting loads are allowed. */
42500 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
42501 return false;
42503 swap_link = swap_link->next;
42507 else if (purpose == FOR_STORES)
42509 df_ref def;
42510 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42512 struct df_link *swap_link = DF_REF_CHAIN (def);
42514 while (swap_link)
42516 if (DF_REF_IS_ARTIFICIAL (link->ref))
42517 return false;
42519 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
42520 unsigned uid2 = INSN_UID (swap_use_insn);
42522 /* Permuting stores or register swaps are allowed. */
42523 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
42524 return false;
42526 swap_link = swap_link->next;
42532 return true;
42535 /* Mark the xxswapdi instructions associated with permuting loads and
42536 stores for removal. Note that we only flag them for deletion here,
42537 as there is a possibility of a swap being reached from multiple
42538 loads, etc. */
42539 static void
42540 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
42542 rtx insn = insn_entry[i].insn;
42543 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42545 if (insn_entry[i].is_load)
42547 df_ref def;
42548 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42550 struct df_link *link = DF_REF_CHAIN (def);
42552 /* We know by now that these are swaps, so we can delete
42553 them confidently. */
42554 while (link)
42556 rtx use_insn = DF_REF_INSN (link->ref);
42557 insn_entry[INSN_UID (use_insn)].will_delete = 1;
42558 link = link->next;
42562 else if (insn_entry[i].is_store)
42564 df_ref use;
42565 FOR_EACH_INSN_INFO_USE (use, insn_info)
42567 /* Ignore uses for addressability. */
42568 machine_mode mode = GET_MODE (DF_REF_REG (use));
42569 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42570 continue;
42572 struct df_link *link = DF_REF_CHAIN (use);
42574 /* We know by now that these are swaps, so we can delete
42575 them confidently. */
42576 while (link)
42578 rtx def_insn = DF_REF_INSN (link->ref);
42579 insn_entry[INSN_UID (def_insn)].will_delete = 1;
42580 link = link->next;
42586 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
42587 Swap the first half of the vector with the second in the first
42588 case. Recurse to find it in the second. */
42589 static void
42590 swap_const_vector_halves (rtx *op_ptr)
42592 int i;
42593 rtx op = *op_ptr;
42594 enum rtx_code code = GET_CODE (op);
42595 if (GET_CODE (op) == CONST_VECTOR)
42597 int units = GET_MODE_NUNITS (GET_MODE (op));
42598 rtx_vector_builder builder (GET_MODE (op), units, 1);
42599 for (i = 0; i < units / 2; ++i)
42600 builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
42601 for (i = 0; i < units / 2; ++i)
42602 builder.quick_push (CONST_VECTOR_ELT (op, i));
42603 *op_ptr = builder.build ();
42605 else
42607 int j;
42608 const char *fmt = GET_RTX_FORMAT (code);
42609 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42610 if (fmt[i] == 'e' || fmt[i] == 'u')
42611 swap_const_vector_halves (&XEXP (op, i));
42612 else if (fmt[i] == 'E')
42613 for (j = 0; j < XVECLEN (op, i); ++j)
42614 swap_const_vector_halves (&XVECEXP (op, i, j));
42618 /* Find all subregs of a vector expression that perform a narrowing,
42619 and adjust the subreg index to account for doubleword swapping. */
42620 static void
42621 adjust_subreg_index (rtx op)
42623 enum rtx_code code = GET_CODE (op);
42624 if (code == SUBREG
42625 && (GET_MODE_SIZE (GET_MODE (op))
42626 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42628 unsigned int index = SUBREG_BYTE (op);
42629 if (index < 8)
42630 index += 8;
42631 else
42632 index -= 8;
42633 SUBREG_BYTE (op) = index;
42636 const char *fmt = GET_RTX_FORMAT (code);
42637 int i,j;
42638 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42639 if (fmt[i] == 'e' || fmt[i] == 'u')
42640 adjust_subreg_index (XEXP (op, i));
42641 else if (fmt[i] == 'E')
42642 for (j = 0; j < XVECLEN (op, i); ++j)
42643 adjust_subreg_index (XVECEXP (op, i, j));
42646 /* Convert the non-permuting load INSN to a permuting one. */
42647 static void
42648 permute_load (rtx_insn *insn)
42650 rtx body = PATTERN (insn);
42651 rtx mem_op = SET_SRC (body);
42652 rtx tgt_reg = SET_DEST (body);
42653 machine_mode mode = GET_MODE (tgt_reg);
42654 int n_elts = GET_MODE_NUNITS (mode);
42655 int half_elts = n_elts / 2;
42656 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42657 int i, j;
42658 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42659 XVECEXP (par, 0, i) = GEN_INT (j);
42660 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42661 XVECEXP (par, 0, i) = GEN_INT (j);
42662 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42663 SET_SRC (body) = sel;
42664 INSN_CODE (insn) = -1; /* Force re-recognition. */
42665 df_insn_rescan (insn);
42667 if (dump_file)
42668 fprintf (dump_file, "Replacing load %d with permuted load\n",
42669 INSN_UID (insn));
42672 /* Convert the non-permuting store INSN to a permuting one. */
42673 static void
42674 permute_store (rtx_insn *insn)
42676 rtx body = PATTERN (insn);
42677 rtx src_reg = SET_SRC (body);
42678 machine_mode mode = GET_MODE (src_reg);
42679 int n_elts = GET_MODE_NUNITS (mode);
42680 int half_elts = n_elts / 2;
42681 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42682 int i, j;
42683 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42684 XVECEXP (par, 0, i) = GEN_INT (j);
42685 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42686 XVECEXP (par, 0, i) = GEN_INT (j);
42687 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42688 SET_SRC (body) = sel;
42689 INSN_CODE (insn) = -1; /* Force re-recognition. */
42690 df_insn_rescan (insn);
42692 if (dump_file)
42693 fprintf (dump_file, "Replacing store %d with permuted store\n",
42694 INSN_UID (insn));
42697 /* Given OP that contains a vector extract operation, adjust the index
42698 of the extracted lane to account for the doubleword swap. */
42699 static void
42700 adjust_extract (rtx_insn *insn)
42702 rtx pattern = PATTERN (insn);
42703 if (GET_CODE (pattern) == PARALLEL)
42704 pattern = XVECEXP (pattern, 0, 0);
42705 rtx src = SET_SRC (pattern);
42706 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42707 account for that. */
42708 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42709 rtx par = XEXP (sel, 1);
42710 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42711 int lane = INTVAL (XVECEXP (par, 0, 0));
42712 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42713 XVECEXP (par, 0, 0) = GEN_INT (lane);
42714 INSN_CODE (insn) = -1; /* Force re-recognition. */
42715 df_insn_rescan (insn);
42717 if (dump_file)
42718 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42721 /* Given OP that contains a vector direct-splat operation, adjust the index
42722 of the source lane to account for the doubleword swap. */
42723 static void
42724 adjust_splat (rtx_insn *insn)
42726 rtx body = PATTERN (insn);
42727 rtx unspec = XEXP (body, 1);
42728 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42729 int lane = INTVAL (XVECEXP (unspec, 0, 1));
42730 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42731 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42732 INSN_CODE (insn) = -1; /* Force re-recognition. */
42733 df_insn_rescan (insn);
42735 if (dump_file)
42736 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42739 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42740 swap), reverse the order of the source operands and adjust the indices
42741 of the source lanes to account for doubleword reversal. */
42742 static void
42743 adjust_xxpermdi (rtx_insn *insn)
42745 rtx set = PATTERN (insn);
42746 rtx select = XEXP (set, 1);
42747 rtx concat = XEXP (select, 0);
42748 rtx src0 = XEXP (concat, 0);
42749 XEXP (concat, 0) = XEXP (concat, 1);
42750 XEXP (concat, 1) = src0;
42751 rtx parallel = XEXP (select, 1);
42752 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42753 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42754 int new_lane0 = 3 - lane1;
42755 int new_lane1 = 3 - lane0;
42756 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42757 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42758 INSN_CODE (insn) = -1; /* Force re-recognition. */
42759 df_insn_rescan (insn);
42761 if (dump_file)
42762 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42765 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42766 reverse the order of those inputs. */
42767 static void
42768 adjust_concat (rtx_insn *insn)
42770 rtx set = PATTERN (insn);
42771 rtx concat = XEXP (set, 1);
42772 rtx src0 = XEXP (concat, 0);
42773 XEXP (concat, 0) = XEXP (concat, 1);
42774 XEXP (concat, 1) = src0;
42775 INSN_CODE (insn) = -1; /* Force re-recognition. */
42776 df_insn_rescan (insn);
42778 if (dump_file)
42779 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42782 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42783 constant pool to reflect swapped doublewords. */
42784 static void
42785 adjust_vperm (rtx_insn *insn)
42787 /* We previously determined that the UNSPEC_VPERM was fed by a
42788 swap of a swapping load of a TOC-relative constant pool symbol.
42789 Find the MEM in the swapping load and replace it with a MEM for
42790 the adjusted mask constant. */
42791 rtx set = PATTERN (insn);
42792 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42794 /* Find the swap. */
42795 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42796 df_ref use;
42797 rtx_insn *swap_insn = 0;
42798 FOR_EACH_INSN_INFO_USE (use, insn_info)
42799 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42801 struct df_link *def_link = DF_REF_CHAIN (use);
42802 gcc_assert (def_link && !def_link->next);
42803 swap_insn = DF_REF_INSN (def_link->ref);
42804 break;
42806 gcc_assert (swap_insn);
42808 /* Find the load. */
42809 insn_info = DF_INSN_INFO_GET (swap_insn);
42810 rtx_insn *load_insn = 0;
42811 FOR_EACH_INSN_INFO_USE (use, insn_info)
42813 struct df_link *def_link = DF_REF_CHAIN (use);
42814 gcc_assert (def_link && !def_link->next);
42815 load_insn = DF_REF_INSN (def_link->ref);
42816 break;
42818 gcc_assert (load_insn);
42820 /* Find the TOC-relative symbol access. */
42821 insn_info = DF_INSN_INFO_GET (load_insn);
42822 rtx_insn *tocrel_insn = 0;
42823 FOR_EACH_INSN_INFO_USE (use, insn_info)
42825 struct df_link *def_link = DF_REF_CHAIN (use);
42826 gcc_assert (def_link && !def_link->next);
42827 tocrel_insn = DF_REF_INSN (def_link->ref);
42828 break;
42830 gcc_assert (tocrel_insn);
42832 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42833 to set tocrel_base; otherwise it would be unnecessary as we've
42834 already established it will return true. */
42835 rtx base, offset;
42836 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42837 /* There is an extra level of indirection for small/large code models. */
42838 if (GET_CODE (tocrel_expr) == MEM)
42839 tocrel_expr = XEXP (tocrel_expr, 0);
42840 if (!toc_relative_expr_p (tocrel_expr, false))
42841 gcc_unreachable ();
42842 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42843 rtx const_vector = get_pool_constant (base);
42844 /* With the extra indirection, get_pool_constant will produce the
42845 real constant from the reg_equal expression, so get the real
42846 constant. */
42847 if (GET_CODE (const_vector) == SYMBOL_REF)
42848 const_vector = get_pool_constant (const_vector);
42849 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42851 /* Create an adjusted mask from the initial mask. */
42852 unsigned int new_mask[16], i, val;
42853 for (i = 0; i < 16; ++i) {
42854 val = INTVAL (XVECEXP (const_vector, 0, i));
42855 if (val < 16)
42856 new_mask[i] = (val + 8) % 16;
42857 else
42858 new_mask[i] = ((val + 8) % 16) + 16;
42861 /* Create a new CONST_VECTOR and a MEM that references it. */
42862 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42863 for (i = 0; i < 16; ++i)
42864 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42865 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42866 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42867 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42868 can't recognize. Force the SYMBOL_REF into a register. */
42869 if (!REG_P (XEXP (new_mem, 0))) {
42870 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42871 XEXP (new_mem, 0) = base_reg;
42872 /* Move the newly created insn ahead of the load insn. */
42873 rtx_insn *force_insn = get_last_insn ();
42874 remove_insn (force_insn);
42875 rtx_insn *before_load_insn = PREV_INSN (load_insn);
42876 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42877 df_insn_rescan (before_load_insn);
42878 df_insn_rescan (force_insn);
42881 /* Replace the MEM in the load instruction and rescan it. */
42882 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42883 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
42884 df_insn_rescan (load_insn);
42886 if (dump_file)
42887 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42890 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42891 with special handling. Take care of that here. */
42892 static void
42893 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42895 rtx_insn *insn = insn_entry[i].insn;
42896 rtx body = PATTERN (insn);
42898 switch (insn_entry[i].special_handling)
42900 default:
42901 gcc_unreachable ();
42902 case SH_CONST_VECTOR:
42904 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42905 gcc_assert (GET_CODE (body) == SET);
42906 swap_const_vector_halves (&SET_SRC (body));
42907 if (dump_file)
42908 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42909 break;
42911 case SH_SUBREG:
42912 /* A subreg of the same size is already safe. For subregs that
42913 select a smaller portion of a reg, adjust the index for
42914 swapped doublewords. */
42915 adjust_subreg_index (body);
42916 if (dump_file)
42917 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42918 break;
42919 case SH_NOSWAP_LD:
42920 /* Convert a non-permuting load to a permuting one. */
42921 permute_load (insn);
42922 break;
42923 case SH_NOSWAP_ST:
42924 /* Convert a non-permuting store to a permuting one. */
42925 permute_store (insn);
42926 break;
42927 case SH_EXTRACT:
42928 /* Change the lane on an extract operation. */
42929 adjust_extract (insn);
42930 break;
42931 case SH_SPLAT:
42932 /* Change the lane on a direct-splat operation. */
42933 adjust_splat (insn);
42934 break;
42935 case SH_XXPERMDI:
42936 /* Change the lanes on an XXPERMDI operation. */
42937 adjust_xxpermdi (insn);
42938 break;
42939 case SH_CONCAT:
42940 /* Reverse the order of a concatenation operation. */
42941 adjust_concat (insn);
42942 break;
42943 case SH_VPERM:
42944 /* Change the mask loaded from the constant pool for a VPERM. */
42945 adjust_vperm (insn);
42946 break;
42950 /* Find the insn from the Ith table entry, which is known to be a
42951 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42952 static void
42953 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42955 rtx_insn *insn = insn_entry[i].insn;
42956 rtx body = PATTERN (insn);
42957 rtx src_reg = XEXP (SET_SRC (body), 0);
42958 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42959 rtx_insn *new_insn = emit_insn_before (copy, insn);
42960 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42961 df_insn_rescan (new_insn);
42963 if (dump_file)
42965 unsigned int new_uid = INSN_UID (new_insn);
42966 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42969 df_insn_delete (insn);
42970 remove_insn (insn);
42971 insn->set_deleted ();
42974 /* Dump the swap table to DUMP_FILE. */
42975 static void
42976 dump_swap_insn_table (swap_web_entry *insn_entry)
42978 int e = get_max_uid ();
42979 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42981 for (int i = 0; i < e; ++i)
42982 if (insn_entry[i].is_relevant)
42984 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42985 fprintf (dump_file, "%6d %6d ", i,
42986 pred_entry && pred_entry->insn
42987 ? INSN_UID (pred_entry->insn) : 0);
42988 if (insn_entry[i].is_load)
42989 fputs ("load ", dump_file);
42990 if (insn_entry[i].is_store)
42991 fputs ("store ", dump_file);
42992 if (insn_entry[i].is_swap)
42993 fputs ("swap ", dump_file);
42994 if (insn_entry[i].is_live_in)
42995 fputs ("live-in ", dump_file);
42996 if (insn_entry[i].is_live_out)
42997 fputs ("live-out ", dump_file);
42998 if (insn_entry[i].contains_subreg)
42999 fputs ("subreg ", dump_file);
43000 if (insn_entry[i].is_128_int)
43001 fputs ("int128 ", dump_file);
43002 if (insn_entry[i].is_call)
43003 fputs ("call ", dump_file);
43004 if (insn_entry[i].is_swappable)
43006 fputs ("swappable ", dump_file);
43007 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
43008 fputs ("special:constvec ", dump_file);
43009 else if (insn_entry[i].special_handling == SH_SUBREG)
43010 fputs ("special:subreg ", dump_file);
43011 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
43012 fputs ("special:load ", dump_file);
43013 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
43014 fputs ("special:store ", dump_file);
43015 else if (insn_entry[i].special_handling == SH_EXTRACT)
43016 fputs ("special:extract ", dump_file);
43017 else if (insn_entry[i].special_handling == SH_SPLAT)
43018 fputs ("special:splat ", dump_file);
43019 else if (insn_entry[i].special_handling == SH_XXPERMDI)
43020 fputs ("special:xxpermdi ", dump_file);
43021 else if (insn_entry[i].special_handling == SH_CONCAT)
43022 fputs ("special:concat ", dump_file);
43023 else if (insn_entry[i].special_handling == SH_VPERM)
43024 fputs ("special:vperm ", dump_file);
43026 if (insn_entry[i].web_not_optimizable)
43027 fputs ("unoptimizable ", dump_file);
43028 if (insn_entry[i].will_delete)
43029 fputs ("delete ", dump_file);
43030 fputs ("\n", dump_file);
43032 fputs ("\n", dump_file);
43035 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
43036 Here RTX is an (& addr (const_int -16)). Always return a new copy
43037 to avoid problems with combine. */
43038 static rtx
43039 alignment_with_canonical_addr (rtx align)
43041 rtx canon;
43042 rtx addr = XEXP (align, 0);
43044 if (REG_P (addr))
43045 canon = addr;
43047 else if (GET_CODE (addr) == PLUS)
43049 rtx addrop0 = XEXP (addr, 0);
43050 rtx addrop1 = XEXP (addr, 1);
43052 if (!REG_P (addrop0))
43053 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
43055 if (!REG_P (addrop1))
43056 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
43058 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
43061 else
43062 canon = force_reg (GET_MODE (addr), addr);
43064 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
43067 /* Check whether an rtx is an alignment mask, and if so, return
43068 a fully-expanded rtx for the masking operation. */
43069 static rtx
43070 alignment_mask (rtx_insn *insn)
43072 rtx body = PATTERN (insn);
43074 if (GET_CODE (body) != SET
43075 || GET_CODE (SET_SRC (body)) != AND
43076 || !REG_P (XEXP (SET_SRC (body), 0)))
43077 return 0;
43079 rtx mask = XEXP (SET_SRC (body), 1);
43081 if (GET_CODE (mask) == CONST_INT)
43083 if (INTVAL (mask) == -16)
43084 return alignment_with_canonical_addr (SET_SRC (body));
43085 else
43086 return 0;
43089 if (!REG_P (mask))
43090 return 0;
43092 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43093 df_ref use;
43094 rtx real_mask = 0;
43096 FOR_EACH_INSN_INFO_USE (use, insn_info)
43098 if (!rtx_equal_p (DF_REF_REG (use), mask))
43099 continue;
43101 struct df_link *def_link = DF_REF_CHAIN (use);
43102 if (!def_link || def_link->next)
43103 return 0;
43105 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
43106 rtx const_body = PATTERN (const_insn);
43107 if (GET_CODE (const_body) != SET)
43108 return 0;
43110 real_mask = SET_SRC (const_body);
43112 if (GET_CODE (real_mask) != CONST_INT
43113 || INTVAL (real_mask) != -16)
43114 return 0;
43117 if (real_mask == 0)
43118 return 0;
43120 return alignment_with_canonical_addr (SET_SRC (body));
43123 /* Given INSN that's a load or store based at BASE_REG, look for a
43124 feeding computation that aligns its address on a 16-byte boundary. */
43125 static rtx
43126 find_alignment_op (rtx_insn *insn, rtx base_reg)
43128 df_ref base_use;
43129 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43130 rtx and_operation = 0;
43132 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
43134 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
43135 continue;
43137 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
43138 if (!base_def_link || base_def_link->next)
43139 break;
43141 /* With stack-protector code enabled, and possibly in other
43142 circumstances, there may not be an associated insn for
43143 the def. */
43144 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
43145 break;
43147 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
43148 and_operation = alignment_mask (and_insn);
43149 if (and_operation != 0)
43150 break;
43153 return and_operation;
43156 struct del_info { bool replace; rtx_insn *replace_insn; };
43158 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43159 static void
43160 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
43162 rtx body = PATTERN (insn);
43163 gcc_assert (GET_CODE (body) == SET
43164 && GET_CODE (SET_SRC (body)) == VEC_SELECT
43165 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
43167 rtx mem = XEXP (SET_SRC (body), 0);
43168 rtx base_reg = XEXP (mem, 0);
43170 rtx and_operation = find_alignment_op (insn, base_reg);
43172 if (and_operation != 0)
43174 df_ref def;
43175 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43176 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43178 struct df_link *link = DF_REF_CHAIN (def);
43179 if (!link || link->next)
43180 break;
43182 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43183 if (!insn_is_swap_p (swap_insn)
43184 || insn_is_load_p (swap_insn)
43185 || insn_is_store_p (swap_insn))
43186 break;
43188 /* Expected lvx pattern found. Change the swap to
43189 a copy, and propagate the AND operation into the
43190 load. */
43191 to_delete[INSN_UID (swap_insn)].replace = true;
43192 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43194 XEXP (mem, 0) = and_operation;
43195 SET_SRC (body) = mem;
43196 INSN_CODE (insn) = -1; /* Force re-recognition. */
43197 df_insn_rescan (insn);
43199 if (dump_file)
43200 fprintf (dump_file, "lvx opportunity found at %d\n",
43201 INSN_UID (insn));
43206 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43207 static void
43208 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
43210 rtx body = PATTERN (insn);
43211 gcc_assert (GET_CODE (body) == SET
43212 && GET_CODE (SET_DEST (body)) == MEM
43213 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
43214 rtx mem = SET_DEST (body);
43215 rtx base_reg = XEXP (mem, 0);
43217 rtx and_operation = find_alignment_op (insn, base_reg);
43219 if (and_operation != 0)
43221 rtx src_reg = XEXP (SET_SRC (body), 0);
43222 df_ref src_use;
43223 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43224 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
43226 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
43227 continue;
43229 struct df_link *link = DF_REF_CHAIN (src_use);
43230 if (!link || link->next)
43231 break;
43233 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43234 if (!insn_is_swap_p (swap_insn)
43235 || insn_is_load_p (swap_insn)
43236 || insn_is_store_p (swap_insn))
43237 break;
43239 /* Expected stvx pattern found. Change the swap to
43240 a copy, and propagate the AND operation into the
43241 store. */
43242 to_delete[INSN_UID (swap_insn)].replace = true;
43243 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43245 XEXP (mem, 0) = and_operation;
43246 SET_SRC (body) = src_reg;
43247 INSN_CODE (insn) = -1; /* Force re-recognition. */
43248 df_insn_rescan (insn);
43250 if (dump_file)
43251 fprintf (dump_file, "stvx opportunity found at %d\n",
43252 INSN_UID (insn));
43257 /* Look for patterns created from builtin lvx and stvx calls, and
43258 canonicalize them to be properly recognized as such. */
43259 static void
43260 recombine_lvx_stvx_patterns (function *fun)
43262 int i;
43263 basic_block bb;
43264 rtx_insn *insn;
43266 int num_insns = get_max_uid ();
43267 del_info *to_delete = XCNEWVEC (del_info, num_insns);
43269 FOR_ALL_BB_FN (bb, fun)
43270 FOR_BB_INSNS (bb, insn)
43272 if (!NONDEBUG_INSN_P (insn))
43273 continue;
43275 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
43276 recombine_lvx_pattern (insn, to_delete);
43277 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
43278 recombine_stvx_pattern (insn, to_delete);
43281 /* Turning swaps into copies is delayed until now, to avoid problems
43282 with deleting instructions during the insn walk. */
43283 for (i = 0; i < num_insns; i++)
43284 if (to_delete[i].replace)
43286 rtx swap_body = PATTERN (to_delete[i].replace_insn);
43287 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
43288 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
43289 rtx_insn *new_insn = emit_insn_before (copy,
43290 to_delete[i].replace_insn);
43291 set_block_for_insn (new_insn,
43292 BLOCK_FOR_INSN (to_delete[i].replace_insn));
43293 df_insn_rescan (new_insn);
43294 df_insn_delete (to_delete[i].replace_insn);
43295 remove_insn (to_delete[i].replace_insn);
43296 to_delete[i].replace_insn->set_deleted ();
43299 free (to_delete);
43302 /* Main entry point for this pass. */
43303 unsigned int
43304 rs6000_analyze_swaps (function *fun)
43306 swap_web_entry *insn_entry;
43307 basic_block bb;
43308 rtx_insn *insn, *curr_insn = 0;
43310 /* Dataflow analysis for use-def chains. */
43311 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
43312 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
43313 df_analyze ();
43314 df_set_flags (DF_DEFER_INSN_RESCAN);
43316 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43317 recombine_lvx_stvx_patterns (fun);
43319 /* Allocate structure to represent webs of insns. */
43320 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
43322 /* Walk the insns to gather basic data. */
43323 FOR_ALL_BB_FN (bb, fun)
43324 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
43326 unsigned int uid = INSN_UID (insn);
43327 if (NONDEBUG_INSN_P (insn))
43329 insn_entry[uid].insn = insn;
43331 if (GET_CODE (insn) == CALL_INSN)
43332 insn_entry[uid].is_call = 1;
43334 /* Walk the uses and defs to see if we mention vector regs.
43335 Record any constraints on optimization of such mentions. */
43336 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43337 df_ref mention;
43338 FOR_EACH_INSN_INFO_USE (mention, insn_info)
43340 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43341 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43343 /* If a use gets its value from a call insn, it will be
43344 a hard register and will look like (reg:V4SI 3 3).
43345 The df analysis creates two mentions for GPR3 and GPR4,
43346 both DImode. We must recognize this and treat it as a
43347 vector mention to ensure the call is unioned with this
43348 use. */
43349 if (mode == DImode && DF_REF_INSN_INFO (mention))
43351 rtx feeder = DF_REF_INSN (mention);
43352 /* FIXME: It is pretty hard to get from the df mention
43353 to the mode of the use in the insn. We arbitrarily
43354 pick a vector mode here, even though the use might
43355 be a real DImode. We can be too conservative
43356 (create a web larger than necessary) because of
43357 this, so consider eventually fixing this. */
43358 if (GET_CODE (feeder) == CALL_INSN)
43359 mode = V4SImode;
43362 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43364 insn_entry[uid].is_relevant = 1;
43365 if (mode == TImode || mode == V1TImode
43366 || FLOAT128_VECTOR_P (mode))
43367 insn_entry[uid].is_128_int = 1;
43368 if (DF_REF_INSN_INFO (mention))
43369 insn_entry[uid].contains_subreg
43370 = !rtx_equal_p (DF_REF_REG (mention),
43371 DF_REF_REAL_REG (mention));
43372 union_defs (insn_entry, insn, mention);
43375 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
43377 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43378 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43380 /* If we're loading up a hard vector register for a call,
43381 it looks like (set (reg:V4SI 9 9) (...)). The df
43382 analysis creates two mentions for GPR9 and GPR10, both
43383 DImode. So relying on the mode from the mentions
43384 isn't sufficient to ensure we union the call into the
43385 web with the parameter setup code. */
43386 if (mode == DImode && GET_CODE (insn) == SET
43387 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
43388 mode = GET_MODE (SET_DEST (insn));
43390 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43392 insn_entry[uid].is_relevant = 1;
43393 if (mode == TImode || mode == V1TImode
43394 || FLOAT128_VECTOR_P (mode))
43395 insn_entry[uid].is_128_int = 1;
43396 if (DF_REF_INSN_INFO (mention))
43397 insn_entry[uid].contains_subreg
43398 = !rtx_equal_p (DF_REF_REG (mention),
43399 DF_REF_REAL_REG (mention));
43400 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43401 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
43402 insn_entry[uid].is_live_out = 1;
43403 union_uses (insn_entry, insn, mention);
43407 if (insn_entry[uid].is_relevant)
43409 /* Determine if this is a load or store. */
43410 insn_entry[uid].is_load = insn_is_load_p (insn);
43411 insn_entry[uid].is_store = insn_is_store_p (insn);
43413 /* Determine if this is a doubleword swap. If not,
43414 determine whether it can legally be swapped. */
43415 if (insn_is_swap_p (insn))
43416 insn_entry[uid].is_swap = 1;
43417 else
43419 unsigned int special = SH_NONE;
43420 insn_entry[uid].is_swappable
43421 = insn_is_swappable_p (insn_entry, insn, &special);
43422 if (special != SH_NONE && insn_entry[uid].contains_subreg)
43423 insn_entry[uid].is_swappable = 0;
43424 else if (special != SH_NONE)
43425 insn_entry[uid].special_handling = special;
43426 else if (insn_entry[uid].contains_subreg)
43427 insn_entry[uid].special_handling = SH_SUBREG;
43433 if (dump_file)
43435 fprintf (dump_file, "\nSwap insn entry table when first built\n");
43436 dump_swap_insn_table (insn_entry);
43439 /* Record unoptimizable webs. */
43440 unsigned e = get_max_uid (), i;
43441 for (i = 0; i < e; ++i)
43443 if (!insn_entry[i].is_relevant)
43444 continue;
43446 swap_web_entry *root
43447 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
43449 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
43450 || (insn_entry[i].contains_subreg
43451 && insn_entry[i].special_handling != SH_SUBREG)
43452 || insn_entry[i].is_128_int || insn_entry[i].is_call
43453 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
43454 root->web_not_optimizable = 1;
43456 /* If we have loads or stores that aren't permuting then the
43457 optimization isn't appropriate. */
43458 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
43459 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
43460 root->web_not_optimizable = 1;
43462 /* If we have permuting loads or stores that are not accompanied
43463 by a register swap, the optimization isn't appropriate. */
43464 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
43466 rtx insn = insn_entry[i].insn;
43467 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43468 df_ref def;
43470 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43472 struct df_link *link = DF_REF_CHAIN (def);
43474 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
43476 root->web_not_optimizable = 1;
43477 break;
43481 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
43483 rtx insn = insn_entry[i].insn;
43484 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43485 df_ref use;
43487 FOR_EACH_INSN_INFO_USE (use, insn_info)
43489 struct df_link *link = DF_REF_CHAIN (use);
43491 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
43493 root->web_not_optimizable = 1;
43494 break;
43500 if (dump_file)
43502 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
43503 dump_swap_insn_table (insn_entry);
43506 /* For each load and store in an optimizable web (which implies
43507 the loads and stores are permuting), find the associated
43508 register swaps and mark them for removal. Due to various
43509 optimizations we may mark the same swap more than once. Also
43510 perform special handling for swappable insns that require it. */
43511 for (i = 0; i < e; ++i)
43512 if ((insn_entry[i].is_load || insn_entry[i].is_store)
43513 && insn_entry[i].is_swap)
43515 swap_web_entry* root_entry
43516 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43517 if (!root_entry->web_not_optimizable)
43518 mark_swaps_for_removal (insn_entry, i);
43520 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
43522 swap_web_entry* root_entry
43523 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43524 if (!root_entry->web_not_optimizable)
43525 handle_special_swappables (insn_entry, i);
43528 /* Now delete the swaps marked for removal. */
43529 for (i = 0; i < e; ++i)
43530 if (insn_entry[i].will_delete)
43531 replace_swap_with_copy (insn_entry, i);
43533 /* Clean up. */
43534 free (insn_entry);
43535 return 0;
43538 const pass_data pass_data_analyze_swaps =
43540 RTL_PASS, /* type */
43541 "swaps", /* name */
43542 OPTGROUP_NONE, /* optinfo_flags */
43543 TV_NONE, /* tv_id */
43544 0, /* properties_required */
43545 0, /* properties_provided */
43546 0, /* properties_destroyed */
43547 0, /* todo_flags_start */
43548 TODO_df_finish, /* todo_flags_finish */
43551 class pass_analyze_swaps : public rtl_opt_pass
43553 public:
43554 pass_analyze_swaps(gcc::context *ctxt)
43555 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43558 /* opt_pass methods: */
43559 virtual bool gate (function *)
43561 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43562 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43565 virtual unsigned int execute (function *fun)
43567 return rs6000_analyze_swaps (fun);
43570 opt_pass *clone ()
43572 return new pass_analyze_swaps (m_ctxt);
43575 }; // class pass_analyze_swaps
43577 rtl_opt_pass *
43578 make_pass_analyze_swaps (gcc::context *ctxt)
43580 return new pass_analyze_swaps (ctxt);
43583 #ifdef RS6000_GLIBC_ATOMIC_FENV
43584 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43585 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43586 #endif
43588 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43590 static void
43591 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43593 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43595 #ifdef RS6000_GLIBC_ATOMIC_FENV
43596 if (atomic_hold_decl == NULL_TREE)
43598 atomic_hold_decl
43599 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43600 get_identifier ("__atomic_feholdexcept"),
43601 build_function_type_list (void_type_node,
43602 double_ptr_type_node,
43603 NULL_TREE));
43604 TREE_PUBLIC (atomic_hold_decl) = 1;
43605 DECL_EXTERNAL (atomic_hold_decl) = 1;
43608 if (atomic_clear_decl == NULL_TREE)
43610 atomic_clear_decl
43611 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43612 get_identifier ("__atomic_feclearexcept"),
43613 build_function_type_list (void_type_node,
43614 NULL_TREE));
43615 TREE_PUBLIC (atomic_clear_decl) = 1;
43616 DECL_EXTERNAL (atomic_clear_decl) = 1;
43619 tree const_double = build_qualified_type (double_type_node,
43620 TYPE_QUAL_CONST);
43621 tree const_double_ptr = build_pointer_type (const_double);
43622 if (atomic_update_decl == NULL_TREE)
43624 atomic_update_decl
43625 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43626 get_identifier ("__atomic_feupdateenv"),
43627 build_function_type_list (void_type_node,
43628 const_double_ptr,
43629 NULL_TREE));
43630 TREE_PUBLIC (atomic_update_decl) = 1;
43631 DECL_EXTERNAL (atomic_update_decl) = 1;
43634 tree fenv_var = create_tmp_var_raw (double_type_node);
43635 TREE_ADDRESSABLE (fenv_var) = 1;
43636 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43638 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43639 *clear = build_call_expr (atomic_clear_decl, 0);
43640 *update = build_call_expr (atomic_update_decl, 1,
43641 fold_convert (const_double_ptr, fenv_addr));
43642 #endif
43643 return;
43646 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43647 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43648 tree call_mffs = build_call_expr (mffs, 0);
43650 /* Generates the equivalent of feholdexcept (&fenv_var)
43652 *fenv_var = __builtin_mffs ();
43653 double fenv_hold;
43654 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43655 __builtin_mtfsf (0xff, fenv_hold); */
43657 /* Mask to clear everything except for the rounding modes and non-IEEE
43658 arithmetic flag. */
43659 const unsigned HOST_WIDE_INT hold_exception_mask =
43660 HOST_WIDE_INT_C (0xffffffff00000007);
43662 tree fenv_var = create_tmp_var_raw (double_type_node);
43664 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43666 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43667 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43668 build_int_cst (uint64_type_node,
43669 hold_exception_mask));
43671 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43672 fenv_llu_and);
43674 tree hold_mtfsf = build_call_expr (mtfsf, 2,
43675 build_int_cst (unsigned_type_node, 0xff),
43676 fenv_hold_mtfsf);
43678 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43680 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43682 double fenv_clear = __builtin_mffs ();
43683 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43684 __builtin_mtfsf (0xff, fenv_clear); */
43686 /* Mask to clear everything except for the rounding modes and non-IEEE
43687 arithmetic flag. */
43688 const unsigned HOST_WIDE_INT clear_exception_mask =
43689 HOST_WIDE_INT_C (0xffffffff00000000);
43691 tree fenv_clear = create_tmp_var_raw (double_type_node);
43693 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43695 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43696 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43697 fenv_clean_llu,
43698 build_int_cst (uint64_type_node,
43699 clear_exception_mask));
43701 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43702 fenv_clear_llu_and);
43704 tree clear_mtfsf = build_call_expr (mtfsf, 2,
43705 build_int_cst (unsigned_type_node, 0xff),
43706 fenv_clear_mtfsf);
43708 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43710 /* Generates the equivalent of feupdateenv (&fenv_var)
43712 double old_fenv = __builtin_mffs ();
43713 double fenv_update;
43714 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43715 (*(uint64_t*)fenv_var 0x1ff80fff);
43716 __builtin_mtfsf (0xff, fenv_update); */
43718 const unsigned HOST_WIDE_INT update_exception_mask =
43719 HOST_WIDE_INT_C (0xffffffff1fffff00);
43720 const unsigned HOST_WIDE_INT new_exception_mask =
43721 HOST_WIDE_INT_C (0x1ff80fff);
43723 tree old_fenv = create_tmp_var_raw (double_type_node);
43724 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43726 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43727 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43728 build_int_cst (uint64_type_node,
43729 update_exception_mask));
43731 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43732 build_int_cst (uint64_type_node,
43733 new_exception_mask));
43735 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43736 old_llu_and, new_llu_and);
43738 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43739 new_llu_mask);
43741 tree update_mtfsf = build_call_expr (mtfsf, 2,
43742 build_int_cst (unsigned_type_node, 0xff),
43743 fenv_update_mtfsf);
43745 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43748 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43750 static bool
43751 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43752 optimization_type opt_type)
43754 switch (op)
43756 case rsqrt_optab:
43757 return (opt_type == OPTIMIZE_FOR_SPEED
43758 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43760 default:
43761 return true;
43765 /* Implement TARGET_CONSTANT_ALIGNMENT. */
43767 static HOST_WIDE_INT
43768 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
43770 if (TREE_CODE (exp) == STRING_CST
43771 && (STRICT_ALIGNMENT || !optimize_size))
43772 return MAX (align, BITS_PER_WORD);
43773 return align;
43776 /* Implement TARGET_STARTING_FRAME_OFFSET. */
43778 static HOST_WIDE_INT
43779 rs6000_starting_frame_offset (void)
43781 if (FRAME_GROWS_DOWNWARD)
43782 return 0;
43783 return RS6000_STARTING_FRAME_OFFSET;
43786 struct gcc_target targetm = TARGET_INITIALIZER;
43788 #include "gt-powerpcspe.h"