2016-11-03 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob340e261b4b494fe8eb33387503623f93cd872ee1
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "print-tree.h"
48 #include "varasm.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "output.h"
52 #include "dbxout.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "sched-int.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-walk.h"
60 #include "intl.h"
61 #include "params.h"
62 #include "tm-constrs.h"
63 #include "tree-vectorizer.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "context.h"
67 #include "tree-pass.h"
68 #if TARGET_XCOFF
69 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
70 #endif
71 #if TARGET_MACHO
72 #include "gstab.h" /* for N_SLINE */
73 #endif
74 #include "case-cfn-macros.h"
75 #include "ppc-auxv.h"
77 /* This file should be included last. */
78 #include "target-def.h"
80 #ifndef TARGET_NO_PROTOTYPE
81 #define TARGET_NO_PROTOTYPE 0
82 #endif
84 #define min(A,B) ((A) < (B) ? (A) : (B))
85 #define max(A,B) ((A) > (B) ? (A) : (B))
87 /* Structure used to define the rs6000 stack */
88 typedef struct rs6000_stack {
89 int reload_completed; /* stack info won't change from here on */
90 int first_gp_reg_save; /* first callee saved GP register used */
91 int first_fp_reg_save; /* first callee saved FP register used */
92 int first_altivec_reg_save; /* first callee saved AltiVec register used */
93 int lr_save_p; /* true if the link reg needs to be saved */
94 int cr_save_p; /* true if the CR reg needs to be saved */
95 unsigned int vrsave_mask; /* mask of vec registers to save */
96 int push_p; /* true if we need to allocate stack space */
97 int calls_p; /* true if the function makes any calls */
98 int world_save_p; /* true if we're saving *everything*:
99 r13-r31, cr, f14-f31, vrsave, v20-v31 */
100 enum rs6000_abi abi; /* which ABI to use */
101 int gp_save_offset; /* offset to save GP regs from initial SP */
102 int fp_save_offset; /* offset to save FP regs from initial SP */
103 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
104 int lr_save_offset; /* offset to save LR from initial SP */
105 int cr_save_offset; /* offset to save CR from initial SP */
106 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
107 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
108 int varargs_save_offset; /* offset to save the varargs registers */
109 int ehrd_offset; /* offset to EH return data */
110 int ehcr_offset; /* offset to EH CR field data */
111 int reg_size; /* register size (4 or 8) */
112 HOST_WIDE_INT vars_size; /* variable save area size */
113 int parm_size; /* outgoing parameter size */
114 int save_size; /* save area size */
115 int fixed_size; /* fixed size of stack frame */
116 int gp_size; /* size of saved GP registers */
117 int fp_size; /* size of saved FP registers */
118 int altivec_size; /* size of saved AltiVec registers */
119 int cr_size; /* size to hold CR if not in fixed area */
120 int vrsave_size; /* size to hold VRSAVE */
121 int altivec_padding_size; /* size of altivec alignment padding */
122 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
123 int spe_padding_size;
124 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
125 int spe_64bit_regs_used;
126 int savres_strategy;
127 } rs6000_stack_t;
129 /* A C structure for machine-specific, per-function data.
130 This is added to the cfun structure. */
131 typedef struct GTY(()) machine_function
133 /* Whether the instruction chain has been scanned already. */
134 int spe_insn_chain_scanned_p;
135 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
136 int ra_needs_full_frame;
137 /* Flags if __builtin_return_address (0) was used. */
138 int ra_need_lr;
139 /* Cache lr_save_p after expansion of builtin_eh_return. */
140 int lr_save_state;
141 /* Whether we need to save the TOC to the reserved stack location in the
142 function prologue. */
143 bool save_toc_in_prologue;
144 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
145 varargs save area. */
146 HOST_WIDE_INT varargs_save_offset;
147 /* Temporary stack slot to use for SDmode copies. This slot is
148 64-bits wide and is allocated early enough so that the offset
149 does not overflow the 16-bit load/store offset field. */
150 rtx sdmode_stack_slot;
151 /* Alternative internal arg pointer for -fsplit-stack. */
152 rtx split_stack_arg_pointer;
153 bool split_stack_argp_used;
154 /* Flag if r2 setup is needed with ELFv2 ABI. */
155 bool r2_setup_needed;
156 /* The components already handled by separate shrink-wrapping, which should
157 not be considered by the prologue and epilogue. */
158 bool gpr_is_wrapped_separately[32];
159 bool lr_is_wrapped_separately;
160 } machine_function;
162 /* Support targetm.vectorize.builtin_mask_for_load. */
163 static GTY(()) tree altivec_builtin_mask_for_load;
165 /* Set to nonzero once AIX common-mode calls have been defined. */
166 static GTY(()) int common_mode_defined;
168 /* Label number of label created for -mrelocatable, to call to so we can
169 get the address of the GOT section */
170 static int rs6000_pic_labelno;
172 #ifdef USING_ELFOS_H
173 /* Counter for labels which are to be placed in .fixup. */
174 int fixuplabelno = 0;
175 #endif
177 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
178 int dot_symbols;
180 /* Specify the machine mode that pointers have. After generation of rtl, the
181 compiler makes no further distinction between pointers and any other objects
182 of this machine mode. The type is unsigned since not all things that
183 include rs6000.h also include machmode.h. */
184 unsigned rs6000_pmode;
186 /* Width in bits of a pointer. */
187 unsigned rs6000_pointer_size;
189 #ifdef HAVE_AS_GNU_ATTRIBUTE
190 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
191 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
192 # endif
193 /* Flag whether floating point values have been passed/returned.
194 Note that this doesn't say whether fprs are used, since the
195 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
196 should be set for soft-float values passed in gprs and ieee128
197 values passed in vsx registers. */
198 static bool rs6000_passes_float;
199 static bool rs6000_passes_long_double;
200 /* Flag whether vector values have been passed/returned. */
201 static bool rs6000_passes_vector;
202 /* Flag whether small (<= 8 byte) structures have been returned. */
203 static bool rs6000_returns_struct;
204 #endif
206 /* Value is TRUE if register/mode pair is acceptable. */
207 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
209 /* Maximum number of registers needed for a given register class and mode. */
210 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
212 /* How many registers are needed for a given register and mode. */
213 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
215 /* Map register number to register class. */
216 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
218 static int dbg_cost_ctrl;
220 /* Built in types. */
221 tree rs6000_builtin_types[RS6000_BTI_MAX];
222 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
224 /* Flag to say the TOC is initialized */
225 int toc_initialized, need_toc_init;
226 char toc_label_name[10];
228 /* Cached value of rs6000_variable_issue. This is cached in
229 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
230 static short cached_can_issue_more;
232 static GTY(()) section *read_only_data_section;
233 static GTY(()) section *private_data_section;
234 static GTY(()) section *tls_data_section;
235 static GTY(()) section *tls_private_data_section;
236 static GTY(()) section *read_only_private_data_section;
237 static GTY(()) section *sdata2_section;
238 static GTY(()) section *toc_section;
240 struct builtin_description
242 const HOST_WIDE_INT mask;
243 const enum insn_code icode;
244 const char *const name;
245 const enum rs6000_builtins code;
248 /* Describe the vector unit used for modes. */
249 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
250 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
252 /* Register classes for various constraints that are based on the target
253 switches. */
254 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
256 /* Describe the alignment of a vector. */
257 int rs6000_vector_align[NUM_MACHINE_MODES];
259 /* Map selected modes to types for builtins. */
260 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
262 /* What modes to automatically generate reciprocal divide estimate (fre) and
263 reciprocal sqrt (frsqrte) for. */
264 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
266 /* Masks to determine which reciprocal esitmate instructions to generate
267 automatically. */
268 enum rs6000_recip_mask {
269 RECIP_SF_DIV = 0x001, /* Use divide estimate */
270 RECIP_DF_DIV = 0x002,
271 RECIP_V4SF_DIV = 0x004,
272 RECIP_V2DF_DIV = 0x008,
274 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
275 RECIP_DF_RSQRT = 0x020,
276 RECIP_V4SF_RSQRT = 0x040,
277 RECIP_V2DF_RSQRT = 0x080,
279 /* Various combination of flags for -mrecip=xxx. */
280 RECIP_NONE = 0,
281 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
282 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
283 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
285 RECIP_HIGH_PRECISION = RECIP_ALL,
287 /* On low precision machines like the power5, don't enable double precision
288 reciprocal square root estimate, since it isn't accurate enough. */
289 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
292 /* -mrecip options. */
293 static struct
295 const char *string; /* option name */
296 unsigned int mask; /* mask bits to set */
297 } recip_options[] = {
298 { "all", RECIP_ALL },
299 { "none", RECIP_NONE },
300 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
301 | RECIP_V2DF_DIV) },
302 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
303 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
304 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
305 | RECIP_V2DF_RSQRT) },
306 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
307 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
310 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
311 static const struct
313 const char *cpu;
314 unsigned int cpuid;
315 } cpu_is_info[] = {
316 { "power9", PPC_PLATFORM_POWER9 },
317 { "power8", PPC_PLATFORM_POWER8 },
318 { "power7", PPC_PLATFORM_POWER7 },
319 { "power6x", PPC_PLATFORM_POWER6X },
320 { "power6", PPC_PLATFORM_POWER6 },
321 { "power5+", PPC_PLATFORM_POWER5_PLUS },
322 { "power5", PPC_PLATFORM_POWER5 },
323 { "ppc970", PPC_PLATFORM_PPC970 },
324 { "power4", PPC_PLATFORM_POWER4 },
325 { "ppca2", PPC_PLATFORM_PPCA2 },
326 { "ppc476", PPC_PLATFORM_PPC476 },
327 { "ppc464", PPC_PLATFORM_PPC464 },
328 { "ppc440", PPC_PLATFORM_PPC440 },
329 { "ppc405", PPC_PLATFORM_PPC405 },
330 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
333 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
334 static const struct
336 const char *hwcap;
337 int mask;
338 unsigned int id;
339 } cpu_supports_info[] = {
340 /* AT_HWCAP masks. */
341 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
342 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
343 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
344 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
345 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
346 { "booke", PPC_FEATURE_BOOKE, 0 },
347 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
348 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
349 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
350 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
351 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
352 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
353 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
354 { "notb", PPC_FEATURE_NO_TB, 0 },
355 { "pa6t", PPC_FEATURE_PA6T, 0 },
356 { "power4", PPC_FEATURE_POWER4, 0 },
357 { "power5", PPC_FEATURE_POWER5, 0 },
358 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
359 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
360 { "ppc32", PPC_FEATURE_32, 0 },
361 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
362 { "ppc64", PPC_FEATURE_64, 0 },
363 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
364 { "smt", PPC_FEATURE_SMT, 0 },
365 { "spe", PPC_FEATURE_HAS_SPE, 0 },
366 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
367 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
368 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
370 /* AT_HWCAP2 masks. */
371 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
372 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
373 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
374 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
375 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
376 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
377 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
378 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
379 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
380 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
383 /* Newer LIBCs explicitly export this symbol to declare that they provide
384 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
385 reference to this symbol whenever we expand a CPU builtin, so that
386 we never link against an old LIBC. */
387 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
389 /* True if we have expanded a CPU builtin. */
390 bool cpu_builtin_p;
392 /* Pointer to function (in rs6000-c.c) that can define or undefine target
393 macros that have changed. Languages that don't support the preprocessor
394 don't link in rs6000-c.c, so we can't call it directly. */
395 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
397 /* Simplfy register classes into simpler classifications. We assume
398 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
399 check for standard register classes (gpr/floating/altivec/vsx) and
400 floating/vector classes (float/altivec/vsx). */
402 enum rs6000_reg_type {
403 NO_REG_TYPE,
404 PSEUDO_REG_TYPE,
405 GPR_REG_TYPE,
406 VSX_REG_TYPE,
407 ALTIVEC_REG_TYPE,
408 FPR_REG_TYPE,
409 SPR_REG_TYPE,
410 CR_REG_TYPE,
411 SPE_ACC_TYPE,
412 SPEFSCR_REG_TYPE
415 /* Map register class to register type. */
416 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
418 /* First/last register type for the 'normal' register types (i.e. general
419 purpose, floating point, altivec, and VSX registers). */
420 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
422 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
425 /* Register classes we care about in secondary reload or go if legitimate
426 address. We only need to worry about GPR, FPR, and Altivec registers here,
427 along an ANY field that is the OR of the 3 register classes. */
429 enum rs6000_reload_reg_type {
430 RELOAD_REG_GPR, /* General purpose registers. */
431 RELOAD_REG_FPR, /* Traditional floating point regs. */
432 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
433 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
434 N_RELOAD_REG
437 /* For setting up register classes, loop through the 3 register classes mapping
438 into real registers, and skip the ANY class, which is just an OR of the
439 bits. */
440 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
441 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
443 /* Map reload register type to a register in the register class. */
444 struct reload_reg_map_type {
445 const char *name; /* Register class name. */
446 int reg; /* Register in the register class. */
449 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
450 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
451 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
452 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
453 { "Any", -1 }, /* RELOAD_REG_ANY. */
456 /* Mask bits for each register class, indexed per mode. Historically the
457 compiler has been more restrictive which types can do PRE_MODIFY instead of
458 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
459 typedef unsigned char addr_mask_type;
461 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
462 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
463 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
464 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
465 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
466 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
467 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
468 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
470 /* Register type masks based on the type, of valid addressing modes. */
471 struct rs6000_reg_addr {
472 enum insn_code reload_load; /* INSN to reload for loading. */
473 enum insn_code reload_store; /* INSN to reload for storing. */
474 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
475 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
476 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
477 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
478 /* INSNs for fusing addi with loads
479 or stores for each reg. class. */
480 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
481 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
482 /* INSNs for fusing addis with loads
483 or stores for each reg. class. */
484 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
485 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
486 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
487 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
488 bool fused_toc; /* Mode supports TOC fusion. */
491 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
493 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
494 static inline bool
495 mode_supports_pre_incdec_p (machine_mode mode)
497 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
498 != 0);
501 /* Helper function to say whether a mode supports PRE_MODIFY. */
502 static inline bool
503 mode_supports_pre_modify_p (machine_mode mode)
505 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
506 != 0);
509 /* Return true if we have D-form addressing in altivec registers. */
510 static inline bool
511 mode_supports_vmx_dform (machine_mode mode)
513 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
516 /* Return true if we have D-form addressing in VSX registers. This addressing
517 is more limited than normal d-form addressing in that the offset must be
518 aligned on a 16-byte boundary. */
519 static inline bool
520 mode_supports_vsx_dform_quad (machine_mode mode)
522 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
523 != 0);
527 /* Target cpu costs. */
529 struct processor_costs {
530 const int mulsi; /* cost of SImode multiplication. */
531 const int mulsi_const; /* cost of SImode multiplication by constant. */
532 const int mulsi_const9; /* cost of SImode mult by short constant. */
533 const int muldi; /* cost of DImode multiplication. */
534 const int divsi; /* cost of SImode division. */
535 const int divdi; /* cost of DImode division. */
536 const int fp; /* cost of simple SFmode and DFmode insns. */
537 const int dmul; /* cost of DFmode multiplication (and fmadd). */
538 const int sdiv; /* cost of SFmode division (fdivs). */
539 const int ddiv; /* cost of DFmode division (fdiv). */
540 const int cache_line_size; /* cache line size in bytes. */
541 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
542 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
543 const int simultaneous_prefetches; /* number of parallel prefetch
544 operations. */
545 const int sfdf_convert; /* cost of SF->DF conversion. */
548 const struct processor_costs *rs6000_cost;
550 /* Processor costs (relative to an add) */
552 /* Instruction size costs on 32bit processors. */
553 static const
554 struct processor_costs size32_cost = {
555 COSTS_N_INSNS (1), /* mulsi */
556 COSTS_N_INSNS (1), /* mulsi_const */
557 COSTS_N_INSNS (1), /* mulsi_const9 */
558 COSTS_N_INSNS (1), /* muldi */
559 COSTS_N_INSNS (1), /* divsi */
560 COSTS_N_INSNS (1), /* divdi */
561 COSTS_N_INSNS (1), /* fp */
562 COSTS_N_INSNS (1), /* dmul */
563 COSTS_N_INSNS (1), /* sdiv */
564 COSTS_N_INSNS (1), /* ddiv */
565 32, /* cache line size */
566 0, /* l1 cache */
567 0, /* l2 cache */
568 0, /* streams */
569 0, /* SF->DF convert */
572 /* Instruction size costs on 64bit processors. */
573 static const
574 struct processor_costs size64_cost = {
575 COSTS_N_INSNS (1), /* mulsi */
576 COSTS_N_INSNS (1), /* mulsi_const */
577 COSTS_N_INSNS (1), /* mulsi_const9 */
578 COSTS_N_INSNS (1), /* muldi */
579 COSTS_N_INSNS (1), /* divsi */
580 COSTS_N_INSNS (1), /* divdi */
581 COSTS_N_INSNS (1), /* fp */
582 COSTS_N_INSNS (1), /* dmul */
583 COSTS_N_INSNS (1), /* sdiv */
584 COSTS_N_INSNS (1), /* ddiv */
585 128, /* cache line size */
586 0, /* l1 cache */
587 0, /* l2 cache */
588 0, /* streams */
589 0, /* SF->DF convert */
592 /* Instruction costs on RS64A processors. */
593 static const
594 struct processor_costs rs64a_cost = {
595 COSTS_N_INSNS (20), /* mulsi */
596 COSTS_N_INSNS (12), /* mulsi_const */
597 COSTS_N_INSNS (8), /* mulsi_const9 */
598 COSTS_N_INSNS (34), /* muldi */
599 COSTS_N_INSNS (65), /* divsi */
600 COSTS_N_INSNS (67), /* divdi */
601 COSTS_N_INSNS (4), /* fp */
602 COSTS_N_INSNS (4), /* dmul */
603 COSTS_N_INSNS (31), /* sdiv */
604 COSTS_N_INSNS (31), /* ddiv */
605 128, /* cache line size */
606 128, /* l1 cache */
607 2048, /* l2 cache */
608 1, /* streams */
609 0, /* SF->DF convert */
612 /* Instruction costs on MPCCORE processors. */
613 static const
614 struct processor_costs mpccore_cost = {
615 COSTS_N_INSNS (2), /* mulsi */
616 COSTS_N_INSNS (2), /* mulsi_const */
617 COSTS_N_INSNS (2), /* mulsi_const9 */
618 COSTS_N_INSNS (2), /* muldi */
619 COSTS_N_INSNS (6), /* divsi */
620 COSTS_N_INSNS (6), /* divdi */
621 COSTS_N_INSNS (4), /* fp */
622 COSTS_N_INSNS (5), /* dmul */
623 COSTS_N_INSNS (10), /* sdiv */
624 COSTS_N_INSNS (17), /* ddiv */
625 32, /* cache line size */
626 4, /* l1 cache */
627 16, /* l2 cache */
628 1, /* streams */
629 0, /* SF->DF convert */
632 /* Instruction costs on PPC403 processors. */
633 static const
634 struct processor_costs ppc403_cost = {
635 COSTS_N_INSNS (4), /* mulsi */
636 COSTS_N_INSNS (4), /* mulsi_const */
637 COSTS_N_INSNS (4), /* mulsi_const9 */
638 COSTS_N_INSNS (4), /* muldi */
639 COSTS_N_INSNS (33), /* divsi */
640 COSTS_N_INSNS (33), /* divdi */
641 COSTS_N_INSNS (11), /* fp */
642 COSTS_N_INSNS (11), /* dmul */
643 COSTS_N_INSNS (11), /* sdiv */
644 COSTS_N_INSNS (11), /* ddiv */
645 32, /* cache line size */
646 4, /* l1 cache */
647 16, /* l2 cache */
648 1, /* streams */
649 0, /* SF->DF convert */
652 /* Instruction costs on PPC405 processors. */
653 static const
654 struct processor_costs ppc405_cost = {
655 COSTS_N_INSNS (5), /* mulsi */
656 COSTS_N_INSNS (4), /* mulsi_const */
657 COSTS_N_INSNS (3), /* mulsi_const9 */
658 COSTS_N_INSNS (5), /* muldi */
659 COSTS_N_INSNS (35), /* divsi */
660 COSTS_N_INSNS (35), /* divdi */
661 COSTS_N_INSNS (11), /* fp */
662 COSTS_N_INSNS (11), /* dmul */
663 COSTS_N_INSNS (11), /* sdiv */
664 COSTS_N_INSNS (11), /* ddiv */
665 32, /* cache line size */
666 16, /* l1 cache */
667 128, /* l2 cache */
668 1, /* streams */
669 0, /* SF->DF convert */
672 /* Instruction costs on PPC440 processors. */
673 static const
674 struct processor_costs ppc440_cost = {
675 COSTS_N_INSNS (3), /* mulsi */
676 COSTS_N_INSNS (2), /* mulsi_const */
677 COSTS_N_INSNS (2), /* mulsi_const9 */
678 COSTS_N_INSNS (3), /* muldi */
679 COSTS_N_INSNS (34), /* divsi */
680 COSTS_N_INSNS (34), /* divdi */
681 COSTS_N_INSNS (5), /* fp */
682 COSTS_N_INSNS (5), /* dmul */
683 COSTS_N_INSNS (19), /* sdiv */
684 COSTS_N_INSNS (33), /* ddiv */
685 32, /* cache line size */
686 32, /* l1 cache */
687 256, /* l2 cache */
688 1, /* streams */
689 0, /* SF->DF convert */
692 /* Instruction costs on PPC476 processors. */
693 static const
694 struct processor_costs ppc476_cost = {
695 COSTS_N_INSNS (4), /* mulsi */
696 COSTS_N_INSNS (4), /* mulsi_const */
697 COSTS_N_INSNS (4), /* mulsi_const9 */
698 COSTS_N_INSNS (4), /* muldi */
699 COSTS_N_INSNS (11), /* divsi */
700 COSTS_N_INSNS (11), /* divdi */
701 COSTS_N_INSNS (6), /* fp */
702 COSTS_N_INSNS (6), /* dmul */
703 COSTS_N_INSNS (19), /* sdiv */
704 COSTS_N_INSNS (33), /* ddiv */
705 32, /* l1 cache line size */
706 32, /* l1 cache */
707 512, /* l2 cache */
708 1, /* streams */
709 0, /* SF->DF convert */
712 /* Instruction costs on PPC601 processors. */
713 static const
714 struct processor_costs ppc601_cost = {
715 COSTS_N_INSNS (5), /* mulsi */
716 COSTS_N_INSNS (5), /* mulsi_const */
717 COSTS_N_INSNS (5), /* mulsi_const9 */
718 COSTS_N_INSNS (5), /* muldi */
719 COSTS_N_INSNS (36), /* divsi */
720 COSTS_N_INSNS (36), /* divdi */
721 COSTS_N_INSNS (4), /* fp */
722 COSTS_N_INSNS (5), /* dmul */
723 COSTS_N_INSNS (17), /* sdiv */
724 COSTS_N_INSNS (31), /* ddiv */
725 32, /* cache line size */
726 32, /* l1 cache */
727 256, /* l2 cache */
728 1, /* streams */
729 0, /* SF->DF convert */
732 /* Instruction costs on PPC603 processors. */
733 static const
734 struct processor_costs ppc603_cost = {
735 COSTS_N_INSNS (5), /* mulsi */
736 COSTS_N_INSNS (3), /* mulsi_const */
737 COSTS_N_INSNS (2), /* mulsi_const9 */
738 COSTS_N_INSNS (5), /* muldi */
739 COSTS_N_INSNS (37), /* divsi */
740 COSTS_N_INSNS (37), /* divdi */
741 COSTS_N_INSNS (3), /* fp */
742 COSTS_N_INSNS (4), /* dmul */
743 COSTS_N_INSNS (18), /* sdiv */
744 COSTS_N_INSNS (33), /* ddiv */
745 32, /* cache line size */
746 8, /* l1 cache */
747 64, /* l2 cache */
748 1, /* streams */
749 0, /* SF->DF convert */
752 /* Instruction costs on PPC604 processors. */
753 static const
754 struct processor_costs ppc604_cost = {
755 COSTS_N_INSNS (4), /* mulsi */
756 COSTS_N_INSNS (4), /* mulsi_const */
757 COSTS_N_INSNS (4), /* mulsi_const9 */
758 COSTS_N_INSNS (4), /* muldi */
759 COSTS_N_INSNS (20), /* divsi */
760 COSTS_N_INSNS (20), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (18), /* sdiv */
764 COSTS_N_INSNS (32), /* ddiv */
765 32, /* cache line size */
766 16, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
769 0, /* SF->DF convert */
772 /* Instruction costs on PPC604e processors. */
773 static const
774 struct processor_costs ppc604e_cost = {
775 COSTS_N_INSNS (2), /* mulsi */
776 COSTS_N_INSNS (2), /* mulsi_const */
777 COSTS_N_INSNS (2), /* mulsi_const9 */
778 COSTS_N_INSNS (2), /* muldi */
779 COSTS_N_INSNS (20), /* divsi */
780 COSTS_N_INSNS (20), /* divdi */
781 COSTS_N_INSNS (3), /* fp */
782 COSTS_N_INSNS (3), /* dmul */
783 COSTS_N_INSNS (18), /* sdiv */
784 COSTS_N_INSNS (32), /* ddiv */
785 32, /* cache line size */
786 32, /* l1 cache */
787 1024, /* l2 cache */
788 1, /* streams */
789 0, /* SF->DF convert */
792 /* Instruction costs on PPC620 processors. */
793 static const
794 struct processor_costs ppc620_cost = {
795 COSTS_N_INSNS (5), /* mulsi */
796 COSTS_N_INSNS (4), /* mulsi_const */
797 COSTS_N_INSNS (3), /* mulsi_const9 */
798 COSTS_N_INSNS (7), /* muldi */
799 COSTS_N_INSNS (21), /* divsi */
800 COSTS_N_INSNS (37), /* divdi */
801 COSTS_N_INSNS (3), /* fp */
802 COSTS_N_INSNS (3), /* dmul */
803 COSTS_N_INSNS (18), /* sdiv */
804 COSTS_N_INSNS (32), /* ddiv */
805 128, /* cache line size */
806 32, /* l1 cache */
807 1024, /* l2 cache */
808 1, /* streams */
809 0, /* SF->DF convert */
812 /* Instruction costs on PPC630 processors. */
813 static const
814 struct processor_costs ppc630_cost = {
815 COSTS_N_INSNS (5), /* mulsi */
816 COSTS_N_INSNS (4), /* mulsi_const */
817 COSTS_N_INSNS (3), /* mulsi_const9 */
818 COSTS_N_INSNS (7), /* muldi */
819 COSTS_N_INSNS (21), /* divsi */
820 COSTS_N_INSNS (37), /* divdi */
821 COSTS_N_INSNS (3), /* fp */
822 COSTS_N_INSNS (3), /* dmul */
823 COSTS_N_INSNS (17), /* sdiv */
824 COSTS_N_INSNS (21), /* ddiv */
825 128, /* cache line size */
826 64, /* l1 cache */
827 1024, /* l2 cache */
828 1, /* streams */
829 0, /* SF->DF convert */
832 /* Instruction costs on Cell processor. */
833 /* COSTS_N_INSNS (1) ~ one add. */
834 static const
835 struct processor_costs ppccell_cost = {
836 COSTS_N_INSNS (9/2)+2, /* mulsi */
837 COSTS_N_INSNS (6/2), /* mulsi_const */
838 COSTS_N_INSNS (6/2), /* mulsi_const9 */
839 COSTS_N_INSNS (15/2)+2, /* muldi */
840 COSTS_N_INSNS (38/2), /* divsi */
841 COSTS_N_INSNS (70/2), /* divdi */
842 COSTS_N_INSNS (10/2), /* fp */
843 COSTS_N_INSNS (10/2), /* dmul */
844 COSTS_N_INSNS (74/2), /* sdiv */
845 COSTS_N_INSNS (74/2), /* ddiv */
846 128, /* cache line size */
847 32, /* l1 cache */
848 512, /* l2 cache */
849 6, /* streams */
850 0, /* SF->DF convert */
853 /* Instruction costs on PPC750 and PPC7400 processors. */
854 static const
855 struct processor_costs ppc750_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (3), /* mulsi_const */
858 COSTS_N_INSNS (2), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (17), /* divsi */
861 COSTS_N_INSNS (17), /* divdi */
862 COSTS_N_INSNS (3), /* fp */
863 COSTS_N_INSNS (3), /* dmul */
864 COSTS_N_INSNS (17), /* sdiv */
865 COSTS_N_INSNS (31), /* ddiv */
866 32, /* cache line size */
867 32, /* l1 cache */
868 512, /* l2 cache */
869 1, /* streams */
870 0, /* SF->DF convert */
873 /* Instruction costs on PPC7450 processors. */
874 static const
875 struct processor_costs ppc7450_cost = {
876 COSTS_N_INSNS (4), /* mulsi */
877 COSTS_N_INSNS (3), /* mulsi_const */
878 COSTS_N_INSNS (3), /* mulsi_const9 */
879 COSTS_N_INSNS (4), /* muldi */
880 COSTS_N_INSNS (23), /* divsi */
881 COSTS_N_INSNS (23), /* divdi */
882 COSTS_N_INSNS (5), /* fp */
883 COSTS_N_INSNS (5), /* dmul */
884 COSTS_N_INSNS (21), /* sdiv */
885 COSTS_N_INSNS (35), /* ddiv */
886 32, /* cache line size */
887 32, /* l1 cache */
888 1024, /* l2 cache */
889 1, /* streams */
890 0, /* SF->DF convert */
893 /* Instruction costs on PPC8540 processors. */
894 static const
895 struct processor_costs ppc8540_cost = {
896 COSTS_N_INSNS (4), /* mulsi */
897 COSTS_N_INSNS (4), /* mulsi_const */
898 COSTS_N_INSNS (4), /* mulsi_const9 */
899 COSTS_N_INSNS (4), /* muldi */
900 COSTS_N_INSNS (19), /* divsi */
901 COSTS_N_INSNS (19), /* divdi */
902 COSTS_N_INSNS (4), /* fp */
903 COSTS_N_INSNS (4), /* dmul */
904 COSTS_N_INSNS (29), /* sdiv */
905 COSTS_N_INSNS (29), /* ddiv */
906 32, /* cache line size */
907 32, /* l1 cache */
908 256, /* l2 cache */
909 1, /* prefetch streams /*/
910 0, /* SF->DF convert */
913 /* Instruction costs on E300C2 and E300C3 cores. */
914 static const
915 struct processor_costs ppce300c2c3_cost = {
916 COSTS_N_INSNS (4), /* mulsi */
917 COSTS_N_INSNS (4), /* mulsi_const */
918 COSTS_N_INSNS (4), /* mulsi_const9 */
919 COSTS_N_INSNS (4), /* muldi */
920 COSTS_N_INSNS (19), /* divsi */
921 COSTS_N_INSNS (19), /* divdi */
922 COSTS_N_INSNS (3), /* fp */
923 COSTS_N_INSNS (4), /* dmul */
924 COSTS_N_INSNS (18), /* sdiv */
925 COSTS_N_INSNS (33), /* ddiv */
927 16, /* l1 cache */
928 16, /* l2 cache */
929 1, /* prefetch streams /*/
930 0, /* SF->DF convert */
933 /* Instruction costs on PPCE500MC processors. */
934 static const
935 struct processor_costs ppce500mc_cost = {
936 COSTS_N_INSNS (4), /* mulsi */
937 COSTS_N_INSNS (4), /* mulsi_const */
938 COSTS_N_INSNS (4), /* mulsi_const9 */
939 COSTS_N_INSNS (4), /* muldi */
940 COSTS_N_INSNS (14), /* divsi */
941 COSTS_N_INSNS (14), /* divdi */
942 COSTS_N_INSNS (8), /* fp */
943 COSTS_N_INSNS (10), /* dmul */
944 COSTS_N_INSNS (36), /* sdiv */
945 COSTS_N_INSNS (66), /* ddiv */
946 64, /* cache line size */
947 32, /* l1 cache */
948 128, /* l2 cache */
949 1, /* prefetch streams /*/
950 0, /* SF->DF convert */
953 /* Instruction costs on PPCE500MC64 processors. */
954 static const
955 struct processor_costs ppce500mc64_cost = {
956 COSTS_N_INSNS (4), /* mulsi */
957 COSTS_N_INSNS (4), /* mulsi_const */
958 COSTS_N_INSNS (4), /* mulsi_const9 */
959 COSTS_N_INSNS (4), /* muldi */
960 COSTS_N_INSNS (14), /* divsi */
961 COSTS_N_INSNS (14), /* divdi */
962 COSTS_N_INSNS (4), /* fp */
963 COSTS_N_INSNS (10), /* dmul */
964 COSTS_N_INSNS (36), /* sdiv */
965 COSTS_N_INSNS (66), /* ddiv */
966 64, /* cache line size */
967 32, /* l1 cache */
968 128, /* l2 cache */
969 1, /* prefetch streams /*/
970 0, /* SF->DF convert */
973 /* Instruction costs on PPCE5500 processors. */
974 static const
975 struct processor_costs ppce5500_cost = {
976 COSTS_N_INSNS (5), /* mulsi */
977 COSTS_N_INSNS (5), /* mulsi_const */
978 COSTS_N_INSNS (4), /* mulsi_const9 */
979 COSTS_N_INSNS (5), /* muldi */
980 COSTS_N_INSNS (14), /* divsi */
981 COSTS_N_INSNS (14), /* divdi */
982 COSTS_N_INSNS (7), /* fp */
983 COSTS_N_INSNS (10), /* dmul */
984 COSTS_N_INSNS (36), /* sdiv */
985 COSTS_N_INSNS (66), /* ddiv */
986 64, /* cache line size */
987 32, /* l1 cache */
988 128, /* l2 cache */
989 1, /* prefetch streams /*/
990 0, /* SF->DF convert */
993 /* Instruction costs on PPCE6500 processors. */
994 static const
995 struct processor_costs ppce6500_cost = {
996 COSTS_N_INSNS (5), /* mulsi */
997 COSTS_N_INSNS (5), /* mulsi_const */
998 COSTS_N_INSNS (4), /* mulsi_const9 */
999 COSTS_N_INSNS (5), /* muldi */
1000 COSTS_N_INSNS (14), /* divsi */
1001 COSTS_N_INSNS (14), /* divdi */
1002 COSTS_N_INSNS (7), /* fp */
1003 COSTS_N_INSNS (10), /* dmul */
1004 COSTS_N_INSNS (36), /* sdiv */
1005 COSTS_N_INSNS (66), /* ddiv */
1006 64, /* cache line size */
1007 32, /* l1 cache */
1008 128, /* l2 cache */
1009 1, /* prefetch streams /*/
1010 0, /* SF->DF convert */
1013 /* Instruction costs on AppliedMicro Titan processors. */
1014 static const
1015 struct processor_costs titan_cost = {
1016 COSTS_N_INSNS (5), /* mulsi */
1017 COSTS_N_INSNS (5), /* mulsi_const */
1018 COSTS_N_INSNS (5), /* mulsi_const9 */
1019 COSTS_N_INSNS (5), /* muldi */
1020 COSTS_N_INSNS (18), /* divsi */
1021 COSTS_N_INSNS (18), /* divdi */
1022 COSTS_N_INSNS (10), /* fp */
1023 COSTS_N_INSNS (10), /* dmul */
1024 COSTS_N_INSNS (46), /* sdiv */
1025 COSTS_N_INSNS (72), /* ddiv */
1026 32, /* cache line size */
1027 32, /* l1 cache */
1028 512, /* l2 cache */
1029 1, /* prefetch streams /*/
1030 0, /* SF->DF convert */
1033 /* Instruction costs on POWER4 and POWER5 processors. */
1034 static const
1035 struct processor_costs power4_cost = {
1036 COSTS_N_INSNS (3), /* mulsi */
1037 COSTS_N_INSNS (2), /* mulsi_const */
1038 COSTS_N_INSNS (2), /* mulsi_const9 */
1039 COSTS_N_INSNS (4), /* muldi */
1040 COSTS_N_INSNS (18), /* divsi */
1041 COSTS_N_INSNS (34), /* divdi */
1042 COSTS_N_INSNS (3), /* fp */
1043 COSTS_N_INSNS (3), /* dmul */
1044 COSTS_N_INSNS (17), /* sdiv */
1045 COSTS_N_INSNS (17), /* ddiv */
1046 128, /* cache line size */
1047 32, /* l1 cache */
1048 1024, /* l2 cache */
1049 8, /* prefetch streams /*/
1050 0, /* SF->DF convert */
1053 /* Instruction costs on POWER6 processors. */
1054 static const
1055 struct processor_costs power6_cost = {
1056 COSTS_N_INSNS (8), /* mulsi */
1057 COSTS_N_INSNS (8), /* mulsi_const */
1058 COSTS_N_INSNS (8), /* mulsi_const9 */
1059 COSTS_N_INSNS (8), /* muldi */
1060 COSTS_N_INSNS (22), /* divsi */
1061 COSTS_N_INSNS (28), /* divdi */
1062 COSTS_N_INSNS (3), /* fp */
1063 COSTS_N_INSNS (3), /* dmul */
1064 COSTS_N_INSNS (13), /* sdiv */
1065 COSTS_N_INSNS (16), /* ddiv */
1066 128, /* cache line size */
1067 64, /* l1 cache */
1068 2048, /* l2 cache */
1069 16, /* prefetch streams */
1070 0, /* SF->DF convert */
1073 /* Instruction costs on POWER7 processors. */
1074 static const
1075 struct processor_costs power7_cost = {
1076 COSTS_N_INSNS (2), /* mulsi */
1077 COSTS_N_INSNS (2), /* mulsi_const */
1078 COSTS_N_INSNS (2), /* mulsi_const9 */
1079 COSTS_N_INSNS (2), /* muldi */
1080 COSTS_N_INSNS (18), /* divsi */
1081 COSTS_N_INSNS (34), /* divdi */
1082 COSTS_N_INSNS (3), /* fp */
1083 COSTS_N_INSNS (3), /* dmul */
1084 COSTS_N_INSNS (13), /* sdiv */
1085 COSTS_N_INSNS (16), /* ddiv */
1086 128, /* cache line size */
1087 32, /* l1 cache */
1088 256, /* l2 cache */
1089 12, /* prefetch streams */
1090 COSTS_N_INSNS (3), /* SF->DF convert */
1093 /* Instruction costs on POWER8 processors. */
1094 static const
1095 struct processor_costs power8_cost = {
1096 COSTS_N_INSNS (3), /* mulsi */
1097 COSTS_N_INSNS (3), /* mulsi_const */
1098 COSTS_N_INSNS (3), /* mulsi_const9 */
1099 COSTS_N_INSNS (3), /* muldi */
1100 COSTS_N_INSNS (19), /* divsi */
1101 COSTS_N_INSNS (35), /* divdi */
1102 COSTS_N_INSNS (3), /* fp */
1103 COSTS_N_INSNS (3), /* dmul */
1104 COSTS_N_INSNS (14), /* sdiv */
1105 COSTS_N_INSNS (17), /* ddiv */
1106 128, /* cache line size */
1107 32, /* l1 cache */
1108 256, /* l2 cache */
1109 12, /* prefetch streams */
1110 COSTS_N_INSNS (3), /* SF->DF convert */
1113 /* Instruction costs on POWER9 processors. */
1114 static const
1115 struct processor_costs power9_cost = {
1116 COSTS_N_INSNS (3), /* mulsi */
1117 COSTS_N_INSNS (3), /* mulsi_const */
1118 COSTS_N_INSNS (3), /* mulsi_const9 */
1119 COSTS_N_INSNS (3), /* muldi */
1120 COSTS_N_INSNS (8), /* divsi */
1121 COSTS_N_INSNS (12), /* divdi */
1122 COSTS_N_INSNS (3), /* fp */
1123 COSTS_N_INSNS (3), /* dmul */
1124 COSTS_N_INSNS (13), /* sdiv */
1125 COSTS_N_INSNS (18), /* ddiv */
1126 128, /* cache line size */
1127 32, /* l1 cache */
1128 512, /* l2 cache */
1129 8, /* prefetch streams */
1130 COSTS_N_INSNS (3), /* SF->DF convert */
1133 /* Instruction costs on POWER A2 processors. */
1134 static const
1135 struct processor_costs ppca2_cost = {
1136 COSTS_N_INSNS (16), /* mulsi */
1137 COSTS_N_INSNS (16), /* mulsi_const */
1138 COSTS_N_INSNS (16), /* mulsi_const9 */
1139 COSTS_N_INSNS (16), /* muldi */
1140 COSTS_N_INSNS (22), /* divsi */
1141 COSTS_N_INSNS (28), /* divdi */
1142 COSTS_N_INSNS (3), /* fp */
1143 COSTS_N_INSNS (3), /* dmul */
1144 COSTS_N_INSNS (59), /* sdiv */
1145 COSTS_N_INSNS (72), /* ddiv */
1147 16, /* l1 cache */
1148 2048, /* l2 cache */
1149 16, /* prefetch streams */
1150 0, /* SF->DF convert */
1154 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1155 #undef RS6000_BUILTIN_0
1156 #undef RS6000_BUILTIN_1
1157 #undef RS6000_BUILTIN_2
1158 #undef RS6000_BUILTIN_3
1159 #undef RS6000_BUILTIN_A
1160 #undef RS6000_BUILTIN_D
1161 #undef RS6000_BUILTIN_E
1162 #undef RS6000_BUILTIN_H
1163 #undef RS6000_BUILTIN_P
1164 #undef RS6000_BUILTIN_Q
1165 #undef RS6000_BUILTIN_S
1166 #undef RS6000_BUILTIN_X
1168 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1169 { NAME, ICODE, MASK, ATTR },
1171 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1172 { NAME, ICODE, MASK, ATTR },
1174 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1175 { NAME, ICODE, MASK, ATTR },
1177 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1178 { NAME, ICODE, MASK, ATTR },
1180 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1181 { NAME, ICODE, MASK, ATTR },
1183 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1184 { NAME, ICODE, MASK, ATTR },
1186 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1187 { NAME, ICODE, MASK, ATTR },
1189 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1190 { NAME, ICODE, MASK, ATTR },
1192 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1193 { NAME, ICODE, MASK, ATTR },
1195 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1196 { NAME, ICODE, MASK, ATTR },
1198 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1199 { NAME, ICODE, MASK, ATTR },
1201 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1202 { NAME, ICODE, MASK, ATTR },
1204 struct rs6000_builtin_info_type {
1205 const char *name;
1206 const enum insn_code icode;
1207 const HOST_WIDE_INT mask;
1208 const unsigned attr;
1211 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1213 #include "rs6000-builtin.def"
1216 #undef RS6000_BUILTIN_0
1217 #undef RS6000_BUILTIN_1
1218 #undef RS6000_BUILTIN_2
1219 #undef RS6000_BUILTIN_3
1220 #undef RS6000_BUILTIN_A
1221 #undef RS6000_BUILTIN_D
1222 #undef RS6000_BUILTIN_E
1223 #undef RS6000_BUILTIN_H
1224 #undef RS6000_BUILTIN_P
1225 #undef RS6000_BUILTIN_Q
1226 #undef RS6000_BUILTIN_S
1227 #undef RS6000_BUILTIN_X
1229 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1230 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1233 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1234 static bool spe_func_has_64bit_regs_p (void);
1235 static struct machine_function * rs6000_init_machine_status (void);
1236 static int rs6000_ra_ever_killed (void);
1237 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1238 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1239 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1240 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1241 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1242 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1243 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1244 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1245 bool);
1246 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1247 unsigned int);
1248 static bool is_microcoded_insn (rtx_insn *);
1249 static bool is_nonpipeline_insn (rtx_insn *);
1250 static bool is_cracked_insn (rtx_insn *);
1251 static bool is_load_insn (rtx, rtx *);
1252 static bool is_store_insn (rtx, rtx *);
1253 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1254 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1255 static bool insn_must_be_first_in_group (rtx_insn *);
1256 static bool insn_must_be_last_in_group (rtx_insn *);
1257 static void altivec_init_builtins (void);
1258 static tree builtin_function_type (machine_mode, machine_mode,
1259 machine_mode, machine_mode,
1260 enum rs6000_builtins, const char *name);
1261 static void rs6000_common_init_builtins (void);
1262 static void paired_init_builtins (void);
1263 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1264 static void spe_init_builtins (void);
1265 static void htm_init_builtins (void);
1266 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1267 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1268 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1269 static rs6000_stack_t *rs6000_stack_info (void);
1270 static void is_altivec_return_reg (rtx, void *);
1271 int easy_vector_constant (rtx, machine_mode);
1272 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1273 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1274 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1275 bool, bool);
1276 #if TARGET_MACHO
1277 static void macho_branch_islands (void);
1278 #endif
1279 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1280 int, int *);
1281 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1282 int, int, int *);
1283 static bool rs6000_mode_dependent_address (const_rtx);
1284 static bool rs6000_debug_mode_dependent_address (const_rtx);
1285 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1286 machine_mode, rtx);
1287 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1288 machine_mode,
1289 rtx);
1290 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1291 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1292 enum reg_class);
1293 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1294 machine_mode);
1295 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1296 enum reg_class,
1297 machine_mode);
1298 static bool rs6000_cannot_change_mode_class (machine_mode,
1299 machine_mode,
1300 enum reg_class);
1301 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1302 machine_mode,
1303 enum reg_class);
1304 static bool rs6000_save_toc_in_prologue_p (void);
1305 static rtx rs6000_internal_arg_pointer (void);
1307 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1308 int, int *)
1309 = rs6000_legitimize_reload_address;
1311 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1312 = rs6000_mode_dependent_address;
1314 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1315 machine_mode, rtx)
1316 = rs6000_secondary_reload_class;
1318 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1319 = rs6000_preferred_reload_class;
1321 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1322 machine_mode)
1323 = rs6000_secondary_memory_needed;
1325 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1326 machine_mode,
1327 enum reg_class)
1328 = rs6000_cannot_change_mode_class;
1330 const int INSN_NOT_AVAILABLE = -1;
1332 static void rs6000_print_isa_options (FILE *, int, const char *,
1333 HOST_WIDE_INT);
1334 static void rs6000_print_builtin_options (FILE *, int, const char *,
1335 HOST_WIDE_INT);
1337 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1338 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1339 enum rs6000_reg_type,
1340 machine_mode,
1341 secondary_reload_info *,
1342 bool);
1343 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1344 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1345 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1347 /* Hash table stuff for keeping track of TOC entries. */
1349 struct GTY((for_user)) toc_hash_struct
1351 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1352 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1353 rtx key;
1354 machine_mode key_mode;
1355 int labelno;
1358 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1360 static hashval_t hash (toc_hash_struct *);
1361 static bool equal (toc_hash_struct *, toc_hash_struct *);
1364 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1366 /* Hash table to keep track of the argument types for builtin functions. */
1368 struct GTY((for_user)) builtin_hash_struct
1370 tree type;
1371 machine_mode mode[4]; /* return value + 3 arguments. */
1372 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1375 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1377 static hashval_t hash (builtin_hash_struct *);
1378 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1381 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1384 /* Default register names. */
1385 char rs6000_reg_names[][8] =
1387 "0", "1", "2", "3", "4", "5", "6", "7",
1388 "8", "9", "10", "11", "12", "13", "14", "15",
1389 "16", "17", "18", "19", "20", "21", "22", "23",
1390 "24", "25", "26", "27", "28", "29", "30", "31",
1391 "0", "1", "2", "3", "4", "5", "6", "7",
1392 "8", "9", "10", "11", "12", "13", "14", "15",
1393 "16", "17", "18", "19", "20", "21", "22", "23",
1394 "24", "25", "26", "27", "28", "29", "30", "31",
1395 "mq", "lr", "ctr","ap",
1396 "0", "1", "2", "3", "4", "5", "6", "7",
1397 "ca",
1398 /* AltiVec registers. */
1399 "0", "1", "2", "3", "4", "5", "6", "7",
1400 "8", "9", "10", "11", "12", "13", "14", "15",
1401 "16", "17", "18", "19", "20", "21", "22", "23",
1402 "24", "25", "26", "27", "28", "29", "30", "31",
1403 "vrsave", "vscr",
1404 /* SPE registers. */
1405 "spe_acc", "spefscr",
1406 /* Soft frame pointer. */
1407 "sfp",
1408 /* HTM SPR registers. */
1409 "tfhar", "tfiar", "texasr",
1410 /* SPE High registers. */
1411 "0", "1", "2", "3", "4", "5", "6", "7",
1412 "8", "9", "10", "11", "12", "13", "14", "15",
1413 "16", "17", "18", "19", "20", "21", "22", "23",
1414 "24", "25", "26", "27", "28", "29", "30", "31"
1417 #ifdef TARGET_REGNAMES
1418 static const char alt_reg_names[][8] =
1420 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1421 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1422 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1423 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1424 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1425 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1426 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1427 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1428 "mq", "lr", "ctr", "ap",
1429 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1430 "ca",
1431 /* AltiVec registers. */
1432 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1433 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1434 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1435 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1436 "vrsave", "vscr",
1437 /* SPE registers. */
1438 "spe_acc", "spefscr",
1439 /* Soft frame pointer. */
1440 "sfp",
1441 /* HTM SPR registers. */
1442 "tfhar", "tfiar", "texasr",
1443 /* SPE High registers. */
1444 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1445 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1446 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1447 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1449 #endif
1451 /* Table of valid machine attributes. */
1453 static const struct attribute_spec rs6000_attribute_table[] =
1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1456 affects_type_identity } */
1457 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1458 false },
1459 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1460 false },
1461 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1462 false },
1463 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1464 false },
1465 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1466 false },
1467 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1468 SUBTARGET_ATTRIBUTE_TABLE,
1469 #endif
1470 { NULL, 0, 0, false, false, false, NULL, false }
1473 #ifndef TARGET_PROFILE_KERNEL
1474 #define TARGET_PROFILE_KERNEL 0
1475 #endif
1477 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1478 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1480 /* Initialize the GCC target structure. */
1481 #undef TARGET_ATTRIBUTE_TABLE
1482 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1483 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1484 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1485 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1486 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1488 #undef TARGET_ASM_ALIGNED_DI_OP
1489 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1491 /* Default unaligned ops are only provided for ELF. Find the ops needed
1492 for non-ELF systems. */
1493 #ifndef OBJECT_FORMAT_ELF
1494 #if TARGET_XCOFF
1495 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1496 64-bit targets. */
1497 #undef TARGET_ASM_UNALIGNED_HI_OP
1498 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1499 #undef TARGET_ASM_UNALIGNED_SI_OP
1500 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1501 #undef TARGET_ASM_UNALIGNED_DI_OP
1502 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1503 #else
1504 /* For Darwin. */
1505 #undef TARGET_ASM_UNALIGNED_HI_OP
1506 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1507 #undef TARGET_ASM_UNALIGNED_SI_OP
1508 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1509 #undef TARGET_ASM_UNALIGNED_DI_OP
1510 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1511 #undef TARGET_ASM_ALIGNED_DI_OP
1512 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1513 #endif
1514 #endif
1516 /* This hook deals with fixups for relocatable code and DI-mode objects
1517 in 64-bit code. */
1518 #undef TARGET_ASM_INTEGER
1519 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1521 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1522 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1523 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1524 #endif
1526 #undef TARGET_SET_UP_BY_PROLOGUE
1527 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1529 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1530 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1531 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1532 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1533 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1534 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1535 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1536 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1537 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1538 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1539 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1540 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1542 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1543 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1545 #undef TARGET_INTERNAL_ARG_POINTER
1546 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1548 #undef TARGET_HAVE_TLS
1549 #define TARGET_HAVE_TLS HAVE_AS_TLS
1551 #undef TARGET_CANNOT_FORCE_CONST_MEM
1552 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1554 #undef TARGET_DELEGITIMIZE_ADDRESS
1555 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1557 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1558 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1560 #undef TARGET_ASM_FUNCTION_PROLOGUE
1561 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1562 #undef TARGET_ASM_FUNCTION_EPILOGUE
1563 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1565 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1566 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1568 #undef TARGET_LEGITIMIZE_ADDRESS
1569 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1571 #undef TARGET_SCHED_VARIABLE_ISSUE
1572 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1574 #undef TARGET_SCHED_ISSUE_RATE
1575 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1576 #undef TARGET_SCHED_ADJUST_COST
1577 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1578 #undef TARGET_SCHED_ADJUST_PRIORITY
1579 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1580 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1581 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1582 #undef TARGET_SCHED_INIT
1583 #define TARGET_SCHED_INIT rs6000_sched_init
1584 #undef TARGET_SCHED_FINISH
1585 #define TARGET_SCHED_FINISH rs6000_sched_finish
1586 #undef TARGET_SCHED_REORDER
1587 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1588 #undef TARGET_SCHED_REORDER2
1589 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1591 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1592 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1594 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1595 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1597 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1598 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1599 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1600 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1601 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1602 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1603 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1604 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1606 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1607 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1608 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1609 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1610 rs6000_builtin_support_vector_misalignment
1611 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1612 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1613 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1614 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1615 rs6000_builtin_vectorization_cost
1616 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1617 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1618 rs6000_preferred_simd_mode
1619 #undef TARGET_VECTORIZE_INIT_COST
1620 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1621 #undef TARGET_VECTORIZE_ADD_STMT_COST
1622 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1623 #undef TARGET_VECTORIZE_FINISH_COST
1624 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1625 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1626 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1628 #undef TARGET_INIT_BUILTINS
1629 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1630 #undef TARGET_BUILTIN_DECL
1631 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1633 #undef TARGET_FOLD_BUILTIN
1634 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1636 #undef TARGET_EXPAND_BUILTIN
1637 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1639 #undef TARGET_MANGLE_TYPE
1640 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1642 #undef TARGET_INIT_LIBFUNCS
1643 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1645 #if TARGET_MACHO
1646 #undef TARGET_BINDS_LOCAL_P
1647 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1648 #endif
1650 #undef TARGET_MS_BITFIELD_LAYOUT_P
1651 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1653 #undef TARGET_ASM_OUTPUT_MI_THUNK
1654 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1659 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1660 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1662 #undef TARGET_REGISTER_MOVE_COST
1663 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1664 #undef TARGET_MEMORY_MOVE_COST
1665 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1666 #undef TARGET_CANNOT_COPY_INSN_P
1667 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1668 #undef TARGET_RTX_COSTS
1669 #define TARGET_RTX_COSTS rs6000_rtx_costs
1670 #undef TARGET_ADDRESS_COST
1671 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1673 #undef TARGET_DWARF_REGISTER_SPAN
1674 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1676 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1677 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1679 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1680 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1682 #undef TARGET_PROMOTE_FUNCTION_MODE
1683 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1685 #undef TARGET_RETURN_IN_MEMORY
1686 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1688 #undef TARGET_RETURN_IN_MSB
1689 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1691 #undef TARGET_SETUP_INCOMING_VARARGS
1692 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1694 /* Always strict argument naming on rs6000. */
1695 #undef TARGET_STRICT_ARGUMENT_NAMING
1696 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1697 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1698 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1699 #undef TARGET_SPLIT_COMPLEX_ARG
1700 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1701 #undef TARGET_MUST_PASS_IN_STACK
1702 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1703 #undef TARGET_PASS_BY_REFERENCE
1704 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1705 #undef TARGET_ARG_PARTIAL_BYTES
1706 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1707 #undef TARGET_FUNCTION_ARG_ADVANCE
1708 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1709 #undef TARGET_FUNCTION_ARG
1710 #define TARGET_FUNCTION_ARG rs6000_function_arg
1711 #undef TARGET_FUNCTION_ARG_BOUNDARY
1712 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1714 #undef TARGET_BUILD_BUILTIN_VA_LIST
1715 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1717 #undef TARGET_EXPAND_BUILTIN_VA_START
1718 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1720 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1721 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1723 #undef TARGET_EH_RETURN_FILTER_MODE
1724 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1727 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1729 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1730 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1732 #undef TARGET_FLOATN_MODE
1733 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1735 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1736 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1738 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1739 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1741 #undef TARGET_MD_ASM_ADJUST
1742 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1744 #undef TARGET_OPTION_OVERRIDE
1745 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1749 rs6000_builtin_vectorized_function
1751 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1752 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1753 rs6000_builtin_md_vectorized_function
1755 #ifdef TARGET_THREAD_SSP_OFFSET
1756 #undef TARGET_STACK_PROTECT_GUARD
1757 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
1758 #endif
1760 #if !TARGET_MACHO
1761 #undef TARGET_STACK_PROTECT_FAIL
1762 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1763 #endif
1765 #ifdef HAVE_AS_TLS
1766 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1767 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1768 #endif
1770 /* Use a 32-bit anchor range. This leads to sequences like:
1772 addis tmp,anchor,high
1773 add dest,tmp,low
1775 where tmp itself acts as an anchor, and can be shared between
1776 accesses to the same 64k page. */
1777 #undef TARGET_MIN_ANCHOR_OFFSET
1778 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1779 #undef TARGET_MAX_ANCHOR_OFFSET
1780 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1781 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1782 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1783 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1784 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1786 #undef TARGET_BUILTIN_RECIPROCAL
1787 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1789 #undef TARGET_EXPAND_TO_RTL_HOOK
1790 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1792 #undef TARGET_INSTANTIATE_DECLS
1793 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1795 #undef TARGET_SECONDARY_RELOAD
1796 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1798 #undef TARGET_LEGITIMATE_ADDRESS_P
1799 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1801 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1802 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1804 #undef TARGET_LRA_P
1805 #define TARGET_LRA_P rs6000_lra_p
1807 #undef TARGET_CAN_ELIMINATE
1808 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1810 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1811 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1813 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1814 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1816 #undef TARGET_TRAMPOLINE_INIT
1817 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1819 #undef TARGET_FUNCTION_VALUE
1820 #define TARGET_FUNCTION_VALUE rs6000_function_value
1822 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1823 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1825 #undef TARGET_OPTION_SAVE
1826 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1828 #undef TARGET_OPTION_RESTORE
1829 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1831 #undef TARGET_OPTION_PRINT
1832 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1834 #undef TARGET_CAN_INLINE_P
1835 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1837 #undef TARGET_SET_CURRENT_FUNCTION
1838 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1840 #undef TARGET_LEGITIMATE_CONSTANT_P
1841 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1843 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1844 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1846 #undef TARGET_CAN_USE_DOLOOP_P
1847 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1852 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1853 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1854 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1855 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1856 #undef TARGET_UNWIND_WORD_MODE
1857 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1859 #undef TARGET_OFFLOAD_OPTIONS
1860 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1862 #undef TARGET_C_MODE_FOR_SUFFIX
1863 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1865 #undef TARGET_INVALID_BINARY_OP
1866 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1868 #undef TARGET_OPTAB_SUPPORTED_P
1869 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1871 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1872 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1875 /* Processor table. */
1876 struct rs6000_ptt
1878 const char *const name; /* Canonical processor name. */
1879 const enum processor_type processor; /* Processor type enum value. */
1880 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1883 static struct rs6000_ptt const processor_target_table[] =
1885 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1886 #include "rs6000-cpus.def"
1887 #undef RS6000_CPU
1890 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1891 name is invalid. */
1893 static int
1894 rs6000_cpu_name_lookup (const char *name)
1896 size_t i;
1898 if (name != NULL)
1900 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1901 if (! strcmp (name, processor_target_table[i].name))
1902 return (int)i;
1905 return -1;
1909 /* Return number of consecutive hard regs needed starting at reg REGNO
1910 to hold something of mode MODE.
1911 This is ordinarily the length in words of a value of mode MODE
1912 but can be less for certain modes in special long registers.
1914 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1915 scalar instructions. The upper 32 bits are only available to the
1916 SIMD instructions.
1918 POWER and PowerPC GPRs hold 32 bits worth;
1919 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1921 static int
1922 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1924 unsigned HOST_WIDE_INT reg_size;
1926 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1927 128-bit floating point that can go in vector registers, which has VSX
1928 memory addressing. */
1929 if (FP_REGNO_P (regno))
1930 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1931 ? UNITS_PER_VSX_WORD
1932 : UNITS_PER_FP_WORD);
1934 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1935 reg_size = UNITS_PER_SPE_WORD;
1937 else if (ALTIVEC_REGNO_P (regno))
1938 reg_size = UNITS_PER_ALTIVEC_WORD;
1940 /* The value returned for SCmode in the E500 double case is 2 for
1941 ABI compatibility; storing an SCmode value in a single register
1942 would require function_arg and rs6000_spe_function_arg to handle
1943 SCmode so as to pass the value correctly in a pair of
1944 registers. */
1945 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1946 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1947 reg_size = UNITS_PER_FP_WORD;
1949 else
1950 reg_size = UNITS_PER_WORD;
1952 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1955 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1956 MODE. */
1957 static int
1958 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1960 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1962 if (COMPLEX_MODE_P (mode))
1963 mode = GET_MODE_INNER (mode);
1965 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1966 register combinations, and use PTImode where we need to deal with quad
1967 word memory operations. Don't allow quad words in the argument or frame
1968 pointer registers, just registers 0..31. */
1969 if (mode == PTImode)
1970 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1971 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1972 && ((regno & 1) == 0));
1974 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1975 implementations. Don't allow an item to be split between a FP register
1976 and an Altivec register. Allow TImode in all VSX registers if the user
1977 asked for it. */
1978 if (TARGET_VSX && VSX_REGNO_P (regno)
1979 && (VECTOR_MEM_VSX_P (mode)
1980 || FLOAT128_VECTOR_P (mode)
1981 || reg_addr[mode].scalar_in_vmx_p
1982 || (TARGET_VSX_TIMODE && mode == TImode)
1983 || (TARGET_VADDUQM && mode == V1TImode)))
1985 if (FP_REGNO_P (regno))
1986 return FP_REGNO_P (last_regno);
1988 if (ALTIVEC_REGNO_P (regno))
1990 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1991 return 0;
1993 return ALTIVEC_REGNO_P (last_regno);
1997 /* The GPRs can hold any mode, but values bigger than one register
1998 cannot go past R31. */
1999 if (INT_REGNO_P (regno))
2000 return INT_REGNO_P (last_regno);
2002 /* The float registers (except for VSX vector modes) can only hold floating
2003 modes and DImode. */
2004 if (FP_REGNO_P (regno))
2006 if (FLOAT128_VECTOR_P (mode))
2007 return false;
2009 if (SCALAR_FLOAT_MODE_P (mode)
2010 && (mode != TDmode || (regno % 2) == 0)
2011 && FP_REGNO_P (last_regno))
2012 return 1;
2014 if (GET_MODE_CLASS (mode) == MODE_INT)
2016 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2017 return 1;
2019 if (TARGET_VSX_SMALL_INTEGER && mode == SImode)
2020 return 1;
2023 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2024 && PAIRED_VECTOR_MODE (mode))
2025 return 1;
2027 return 0;
2030 /* The CR register can only hold CC modes. */
2031 if (CR_REGNO_P (regno))
2032 return GET_MODE_CLASS (mode) == MODE_CC;
2034 if (CA_REGNO_P (regno))
2035 return mode == Pmode || mode == SImode;
2037 /* AltiVec only in AldyVec registers. */
2038 if (ALTIVEC_REGNO_P (regno))
2039 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2040 || mode == V1TImode);
2042 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2043 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2044 return 1;
2046 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2047 and it must be able to fit within the register set. */
2049 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2052 /* Print interesting facts about registers. */
2053 static void
2054 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2056 int r, m;
2058 for (r = first_regno; r <= last_regno; ++r)
2060 const char *comma = "";
2061 int len;
2063 if (first_regno == last_regno)
2064 fprintf (stderr, "%s:\t", reg_name);
2065 else
2066 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2068 len = 8;
2069 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2070 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2072 if (len > 70)
2074 fprintf (stderr, ",\n\t");
2075 len = 8;
2076 comma = "";
2079 if (rs6000_hard_regno_nregs[m][r] > 1)
2080 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2081 rs6000_hard_regno_nregs[m][r]);
2082 else
2083 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2085 comma = ", ";
2088 if (call_used_regs[r])
2090 if (len > 70)
2092 fprintf (stderr, ",\n\t");
2093 len = 8;
2094 comma = "";
2097 len += fprintf (stderr, "%s%s", comma, "call-used");
2098 comma = ", ";
2101 if (fixed_regs[r])
2103 if (len > 70)
2105 fprintf (stderr, ",\n\t");
2106 len = 8;
2107 comma = "";
2110 len += fprintf (stderr, "%s%s", comma, "fixed");
2111 comma = ", ";
2114 if (len > 70)
2116 fprintf (stderr, ",\n\t");
2117 comma = "";
2120 len += fprintf (stderr, "%sreg-class = %s", comma,
2121 reg_class_names[(int)rs6000_regno_regclass[r]]);
2122 comma = ", ";
2124 if (len > 70)
2126 fprintf (stderr, ",\n\t");
2127 comma = "";
2130 fprintf (stderr, "%sregno = %d\n", comma, r);
2134 static const char *
2135 rs6000_debug_vector_unit (enum rs6000_vector v)
2137 const char *ret;
2139 switch (v)
2141 case VECTOR_NONE: ret = "none"; break;
2142 case VECTOR_ALTIVEC: ret = "altivec"; break;
2143 case VECTOR_VSX: ret = "vsx"; break;
2144 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2145 case VECTOR_PAIRED: ret = "paired"; break;
2146 case VECTOR_SPE: ret = "spe"; break;
2147 case VECTOR_OTHER: ret = "other"; break;
2148 default: ret = "unknown"; break;
2151 return ret;
2154 /* Inner function printing just the address mask for a particular reload
2155 register class. */
2156 DEBUG_FUNCTION char *
2157 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2159 static char ret[8];
2160 char *p = ret;
2162 if ((mask & RELOAD_REG_VALID) != 0)
2163 *p++ = 'v';
2164 else if (keep_spaces)
2165 *p++ = ' ';
2167 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2168 *p++ = 'm';
2169 else if (keep_spaces)
2170 *p++ = ' ';
2172 if ((mask & RELOAD_REG_INDEXED) != 0)
2173 *p++ = 'i';
2174 else if (keep_spaces)
2175 *p++ = ' ';
2177 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2178 *p++ = 'O';
2179 else if ((mask & RELOAD_REG_OFFSET) != 0)
2180 *p++ = 'o';
2181 else if (keep_spaces)
2182 *p++ = ' ';
2184 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2185 *p++ = '+';
2186 else if (keep_spaces)
2187 *p++ = ' ';
2189 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2190 *p++ = '+';
2191 else if (keep_spaces)
2192 *p++ = ' ';
2194 if ((mask & RELOAD_REG_AND_M16) != 0)
2195 *p++ = '&';
2196 else if (keep_spaces)
2197 *p++ = ' ';
2199 *p = '\0';
2201 return ret;
2204 /* Print the address masks in a human readble fashion. */
2205 DEBUG_FUNCTION void
2206 rs6000_debug_print_mode (ssize_t m)
2208 ssize_t rc;
2209 int spaces = 0;
2210 bool fuse_extra_p;
2212 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2213 for (rc = 0; rc < N_RELOAD_REG; rc++)
2214 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2215 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2217 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2218 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2219 fprintf (stderr, " Reload=%c%c",
2220 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2221 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2222 else
2223 spaces += sizeof (" Reload=sl") - 1;
2225 if (reg_addr[m].scalar_in_vmx_p)
2227 fprintf (stderr, "%*s Upper=y", spaces, "");
2228 spaces = 0;
2230 else
2231 spaces += sizeof (" Upper=y") - 1;
2233 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2234 || reg_addr[m].fused_toc);
2235 if (!fuse_extra_p)
2237 for (rc = 0; rc < N_RELOAD_REG; rc++)
2239 if (rc != RELOAD_REG_ANY)
2241 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2242 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2243 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2244 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2245 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2247 fuse_extra_p = true;
2248 break;
2254 if (fuse_extra_p)
2256 fprintf (stderr, "%*s Fuse:", spaces, "");
2257 spaces = 0;
2259 for (rc = 0; rc < N_RELOAD_REG; rc++)
2261 if (rc != RELOAD_REG_ANY)
2263 char load, store;
2265 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2266 load = 'l';
2267 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2268 load = 'L';
2269 else
2270 load = '-';
2272 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2273 store = 's';
2274 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2275 store = 'S';
2276 else
2277 store = '-';
2279 if (load == '-' && store == '-')
2280 spaces += 5;
2281 else
2283 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2284 reload_reg_map[rc].name[0], load, store);
2285 spaces = 0;
2290 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2292 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2293 spaces = 0;
2295 else
2296 spaces += sizeof (" P8gpr") - 1;
2298 if (reg_addr[m].fused_toc)
2300 fprintf (stderr, "%*sToc", (spaces + 1), "");
2301 spaces = 0;
2303 else
2304 spaces += sizeof (" Toc") - 1;
2306 else
2307 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2309 if (rs6000_vector_unit[m] != VECTOR_NONE
2310 || rs6000_vector_mem[m] != VECTOR_NONE)
2312 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2313 spaces, "",
2314 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2315 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2318 fputs ("\n", stderr);
2321 #define DEBUG_FMT_ID "%-32s= "
2322 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2323 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2324 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2326 /* Print various interesting information with -mdebug=reg. */
2327 static void
2328 rs6000_debug_reg_global (void)
2330 static const char *const tf[2] = { "false", "true" };
2331 const char *nl = (const char *)0;
2332 int m;
2333 size_t m1, m2, v;
2334 char costly_num[20];
2335 char nop_num[20];
2336 char flags_buffer[40];
2337 const char *costly_str;
2338 const char *nop_str;
2339 const char *trace_str;
2340 const char *abi_str;
2341 const char *cmodel_str;
2342 struct cl_target_option cl_opts;
2344 /* Modes we want tieable information on. */
2345 static const machine_mode print_tieable_modes[] = {
2346 QImode,
2347 HImode,
2348 SImode,
2349 DImode,
2350 TImode,
2351 PTImode,
2352 SFmode,
2353 DFmode,
2354 TFmode,
2355 IFmode,
2356 KFmode,
2357 SDmode,
2358 DDmode,
2359 TDmode,
2360 V8QImode,
2361 V4HImode,
2362 V2SImode,
2363 V16QImode,
2364 V8HImode,
2365 V4SImode,
2366 V2DImode,
2367 V1TImode,
2368 V32QImode,
2369 V16HImode,
2370 V8SImode,
2371 V4DImode,
2372 V2TImode,
2373 V2SFmode,
2374 V4SFmode,
2375 V2DFmode,
2376 V8SFmode,
2377 V4DFmode,
2378 CCmode,
2379 CCUNSmode,
2380 CCEQmode,
2383 /* Virtual regs we are interested in. */
2384 const static struct {
2385 int regno; /* register number. */
2386 const char *name; /* register name. */
2387 } virtual_regs[] = {
2388 { STACK_POINTER_REGNUM, "stack pointer:" },
2389 { TOC_REGNUM, "toc: " },
2390 { STATIC_CHAIN_REGNUM, "static chain: " },
2391 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2392 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2393 { ARG_POINTER_REGNUM, "arg pointer: " },
2394 { FRAME_POINTER_REGNUM, "frame pointer:" },
2395 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2396 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2397 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2398 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2399 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2400 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2401 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2402 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2403 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2406 fputs ("\nHard register information:\n", stderr);
2407 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2408 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2409 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2410 LAST_ALTIVEC_REGNO,
2411 "vs");
2412 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2413 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2414 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2415 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2416 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2417 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2418 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2419 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2421 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2422 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2423 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2425 fprintf (stderr,
2426 "\n"
2427 "d reg_class = %s\n"
2428 "f reg_class = %s\n"
2429 "v reg_class = %s\n"
2430 "wa reg_class = %s\n"
2431 "wb reg_class = %s\n"
2432 "wd reg_class = %s\n"
2433 "we reg_class = %s\n"
2434 "wf reg_class = %s\n"
2435 "wg reg_class = %s\n"
2436 "wh reg_class = %s\n"
2437 "wi reg_class = %s\n"
2438 "wj reg_class = %s\n"
2439 "wk reg_class = %s\n"
2440 "wl reg_class = %s\n"
2441 "wm reg_class = %s\n"
2442 "wo reg_class = %s\n"
2443 "wp reg_class = %s\n"
2444 "wq reg_class = %s\n"
2445 "wr reg_class = %s\n"
2446 "ws reg_class = %s\n"
2447 "wt reg_class = %s\n"
2448 "wu reg_class = %s\n"
2449 "wv reg_class = %s\n"
2450 "ww reg_class = %s\n"
2451 "wx reg_class = %s\n"
2452 "wy reg_class = %s\n"
2453 "wz reg_class = %s\n"
2454 "wH reg_class = %s\n"
2455 "wI reg_class = %s\n"
2456 "wJ reg_class = %s\n"
2457 "wK reg_class = %s\n"
2458 "\n",
2459 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2460 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2461 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2462 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2463 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2464 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2465 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2466 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2467 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2468 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2469 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2470 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2471 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2472 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2473 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2474 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2475 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2476 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2477 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2478 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2479 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2480 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2481 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2482 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2483 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2484 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2485 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2486 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2487 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2488 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2489 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2491 nl = "\n";
2492 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2493 rs6000_debug_print_mode (m);
2495 fputs ("\n", stderr);
2497 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2499 machine_mode mode1 = print_tieable_modes[m1];
2500 bool first_time = true;
2502 nl = (const char *)0;
2503 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2505 machine_mode mode2 = print_tieable_modes[m2];
2506 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2508 if (first_time)
2510 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2511 nl = "\n";
2512 first_time = false;
2515 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2519 if (!first_time)
2520 fputs ("\n", stderr);
2523 if (nl)
2524 fputs (nl, stderr);
2526 if (rs6000_recip_control)
2528 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2530 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2531 if (rs6000_recip_bits[m])
2533 fprintf (stderr,
2534 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2535 GET_MODE_NAME (m),
2536 (RS6000_RECIP_AUTO_RE_P (m)
2537 ? "auto"
2538 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2539 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2540 ? "auto"
2541 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2544 fputs ("\n", stderr);
2547 if (rs6000_cpu_index >= 0)
2549 const char *name = processor_target_table[rs6000_cpu_index].name;
2550 HOST_WIDE_INT flags
2551 = processor_target_table[rs6000_cpu_index].target_enable;
2553 sprintf (flags_buffer, "-mcpu=%s flags", name);
2554 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2556 else
2557 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2559 if (rs6000_tune_index >= 0)
2561 const char *name = processor_target_table[rs6000_tune_index].name;
2562 HOST_WIDE_INT flags
2563 = processor_target_table[rs6000_tune_index].target_enable;
2565 sprintf (flags_buffer, "-mtune=%s flags", name);
2566 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2568 else
2569 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2571 cl_target_option_save (&cl_opts, &global_options);
2572 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2573 rs6000_isa_flags);
2575 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2576 rs6000_isa_flags_explicit);
2578 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2579 rs6000_builtin_mask);
2581 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2583 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2584 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2586 switch (rs6000_sched_costly_dep)
2588 case max_dep_latency:
2589 costly_str = "max_dep_latency";
2590 break;
2592 case no_dep_costly:
2593 costly_str = "no_dep_costly";
2594 break;
2596 case all_deps_costly:
2597 costly_str = "all_deps_costly";
2598 break;
2600 case true_store_to_load_dep_costly:
2601 costly_str = "true_store_to_load_dep_costly";
2602 break;
2604 case store_to_load_dep_costly:
2605 costly_str = "store_to_load_dep_costly";
2606 break;
2608 default:
2609 costly_str = costly_num;
2610 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2611 break;
2614 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2616 switch (rs6000_sched_insert_nops)
2618 case sched_finish_regroup_exact:
2619 nop_str = "sched_finish_regroup_exact";
2620 break;
2622 case sched_finish_pad_groups:
2623 nop_str = "sched_finish_pad_groups";
2624 break;
2626 case sched_finish_none:
2627 nop_str = "sched_finish_none";
2628 break;
2630 default:
2631 nop_str = nop_num;
2632 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2633 break;
2636 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2638 switch (rs6000_sdata)
2640 default:
2641 case SDATA_NONE:
2642 break;
2644 case SDATA_DATA:
2645 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2646 break;
2648 case SDATA_SYSV:
2649 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2650 break;
2652 case SDATA_EABI:
2653 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2654 break;
2658 switch (rs6000_traceback)
2660 case traceback_default: trace_str = "default"; break;
2661 case traceback_none: trace_str = "none"; break;
2662 case traceback_part: trace_str = "part"; break;
2663 case traceback_full: trace_str = "full"; break;
2664 default: trace_str = "unknown"; break;
2667 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2669 switch (rs6000_current_cmodel)
2671 case CMODEL_SMALL: cmodel_str = "small"; break;
2672 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2673 case CMODEL_LARGE: cmodel_str = "large"; break;
2674 default: cmodel_str = "unknown"; break;
2677 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2679 switch (rs6000_current_abi)
2681 case ABI_NONE: abi_str = "none"; break;
2682 case ABI_AIX: abi_str = "aix"; break;
2683 case ABI_ELFv2: abi_str = "ELFv2"; break;
2684 case ABI_V4: abi_str = "V4"; break;
2685 case ABI_DARWIN: abi_str = "darwin"; break;
2686 default: abi_str = "unknown"; break;
2689 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2691 if (rs6000_altivec_abi)
2692 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2694 if (rs6000_spe_abi)
2695 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2697 if (rs6000_darwin64_abi)
2698 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2700 if (rs6000_float_gprs)
2701 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2703 fprintf (stderr, DEBUG_FMT_S, "fprs",
2704 (TARGET_FPRS ? "true" : "false"));
2706 fprintf (stderr, DEBUG_FMT_S, "single_float",
2707 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2709 fprintf (stderr, DEBUG_FMT_S, "double_float",
2710 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2712 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2713 (TARGET_SOFT_FLOAT ? "true" : "false"));
2715 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2716 (TARGET_E500_SINGLE ? "true" : "false"));
2718 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2719 (TARGET_E500_DOUBLE ? "true" : "false"));
2721 if (TARGET_LINK_STACK)
2722 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2724 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2726 if (TARGET_P8_FUSION)
2728 char options[80];
2730 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2731 if (TARGET_TOC_FUSION)
2732 strcat (options, ", toc");
2734 if (TARGET_P8_FUSION_SIGN)
2735 strcat (options, ", sign");
2737 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2740 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2741 TARGET_SECURE_PLT ? "secure" : "bss");
2742 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2743 aix_struct_return ? "aix" : "sysv");
2744 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2745 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2746 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2747 tf[!!rs6000_align_branch_targets]);
2748 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2749 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2750 rs6000_long_double_type_size);
2751 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2752 (int)rs6000_sched_restricted_insns_priority);
2753 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2754 (int)END_BUILTINS);
2755 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2756 (int)RS6000_BUILTIN_COUNT);
2758 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2759 (int)TARGET_FLOAT128_ENABLE_TYPE);
2761 if (TARGET_VSX)
2762 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2763 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2765 if (TARGET_DIRECT_MOVE_128)
2766 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2767 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2771 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2772 legitimate address support to figure out the appropriate addressing to
2773 use. */
2775 static void
2776 rs6000_setup_reg_addr_masks (void)
2778 ssize_t rc, reg, m, nregs;
2779 addr_mask_type any_addr_mask, addr_mask;
2781 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2783 machine_mode m2 = (machine_mode) m;
2784 bool complex_p = false;
2785 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2786 size_t msize;
2788 if (COMPLEX_MODE_P (m2))
2790 complex_p = true;
2791 m2 = GET_MODE_INNER (m2);
2794 msize = GET_MODE_SIZE (m2);
2796 /* SDmode is special in that we want to access it only via REG+REG
2797 addressing on power7 and above, since we want to use the LFIWZX and
2798 STFIWZX instructions to load it. */
2799 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2801 any_addr_mask = 0;
2802 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2804 addr_mask = 0;
2805 reg = reload_reg_map[rc].reg;
2807 /* Can mode values go in the GPR/FPR/Altivec registers? */
2808 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2810 bool small_int_vsx_p = (small_int_p
2811 && (rc == RELOAD_REG_FPR
2812 || rc == RELOAD_REG_VMX));
2814 nregs = rs6000_hard_regno_nregs[m][reg];
2815 addr_mask |= RELOAD_REG_VALID;
2817 /* Indicate if the mode takes more than 1 physical register. If
2818 it takes a single register, indicate it can do REG+REG
2819 addressing. Small integers in VSX registers can only do
2820 REG+REG addressing. */
2821 if (small_int_vsx_p)
2822 addr_mask |= RELOAD_REG_INDEXED;
2823 else if (nregs > 1 || m == BLKmode || complex_p)
2824 addr_mask |= RELOAD_REG_MULTIPLE;
2825 else
2826 addr_mask |= RELOAD_REG_INDEXED;
2828 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2829 addressing. Restrict addressing on SPE for 64-bit types
2830 because of the SUBREG hackery used to address 64-bit floats in
2831 '32-bit' GPRs. If we allow scalars into Altivec registers,
2832 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2834 if (TARGET_UPDATE
2835 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2836 && msize <= 8
2837 && !VECTOR_MODE_P (m2)
2838 && !FLOAT128_VECTOR_P (m2)
2839 && !complex_p
2840 && !small_int_vsx_p
2841 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2842 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2843 && !(TARGET_E500_DOUBLE && msize == 8))
2845 addr_mask |= RELOAD_REG_PRE_INCDEC;
2847 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2848 we don't allow PRE_MODIFY for some multi-register
2849 operations. */
2850 switch (m)
2852 default:
2853 addr_mask |= RELOAD_REG_PRE_MODIFY;
2854 break;
2856 case DImode:
2857 if (TARGET_POWERPC64)
2858 addr_mask |= RELOAD_REG_PRE_MODIFY;
2859 break;
2861 case DFmode:
2862 case DDmode:
2863 if (TARGET_DF_INSN)
2864 addr_mask |= RELOAD_REG_PRE_MODIFY;
2865 break;
2870 /* GPR and FPR registers can do REG+OFFSET addressing, except
2871 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2872 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2873 if ((addr_mask != 0) && !indexed_only_p
2874 && msize <= 8
2875 && (rc == RELOAD_REG_GPR
2876 || ((msize == 8 || m2 == SFmode)
2877 && (rc == RELOAD_REG_FPR
2878 || (rc == RELOAD_REG_VMX
2879 && TARGET_P9_DFORM_SCALAR)))))
2880 addr_mask |= RELOAD_REG_OFFSET;
2882 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2883 instructions are enabled. The offset for 128-bit VSX registers is
2884 only 12-bits. While GPRs can handle the full offset range, VSX
2885 registers can only handle the restricted range. */
2886 else if ((addr_mask != 0) && !indexed_only_p
2887 && msize == 16 && TARGET_P9_DFORM_VECTOR
2888 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2889 || (m2 == TImode && TARGET_VSX_TIMODE)))
2891 addr_mask |= RELOAD_REG_OFFSET;
2892 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2893 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2896 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2897 addressing on 128-bit types. */
2898 if (rc == RELOAD_REG_VMX && msize == 16
2899 && (addr_mask & RELOAD_REG_VALID) != 0)
2900 addr_mask |= RELOAD_REG_AND_M16;
2902 reg_addr[m].addr_mask[rc] = addr_mask;
2903 any_addr_mask |= addr_mask;
2906 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2911 /* Initialize the various global tables that are based on register size. */
2912 static void
2913 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2915 ssize_t r, m, c;
2916 int align64;
2917 int align32;
2919 /* Precalculate REGNO_REG_CLASS. */
2920 rs6000_regno_regclass[0] = GENERAL_REGS;
2921 for (r = 1; r < 32; ++r)
2922 rs6000_regno_regclass[r] = BASE_REGS;
2924 for (r = 32; r < 64; ++r)
2925 rs6000_regno_regclass[r] = FLOAT_REGS;
2927 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2928 rs6000_regno_regclass[r] = NO_REGS;
2930 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2931 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2933 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2934 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2935 rs6000_regno_regclass[r] = CR_REGS;
2937 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2938 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2939 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2940 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2941 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2942 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2943 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2944 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2945 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2946 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2947 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2948 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2950 /* Precalculate register class to simpler reload register class. We don't
2951 need all of the register classes that are combinations of different
2952 classes, just the simple ones that have constraint letters. */
2953 for (c = 0; c < N_REG_CLASSES; c++)
2954 reg_class_to_reg_type[c] = NO_REG_TYPE;
2956 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2957 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2958 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2959 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2960 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2961 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2962 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2963 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2964 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2965 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2966 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2967 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2969 if (TARGET_VSX)
2971 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2972 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2974 else
2976 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2977 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2980 /* Precalculate the valid memory formats as well as the vector information,
2981 this must be set up before the rs6000_hard_regno_nregs_internal calls
2982 below. */
2983 gcc_assert ((int)VECTOR_NONE == 0);
2984 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2985 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2987 gcc_assert ((int)CODE_FOR_nothing == 0);
2988 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2990 gcc_assert ((int)NO_REGS == 0);
2991 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2993 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2994 believes it can use native alignment or still uses 128-bit alignment. */
2995 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2997 align64 = 64;
2998 align32 = 32;
3000 else
3002 align64 = 128;
3003 align32 = 128;
3006 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3007 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3008 if (TARGET_FLOAT128_TYPE)
3010 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3011 rs6000_vector_align[KFmode] = 128;
3013 if (FLOAT128_IEEE_P (TFmode))
3015 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3016 rs6000_vector_align[TFmode] = 128;
3020 /* V2DF mode, VSX only. */
3021 if (TARGET_VSX)
3023 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3024 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3025 rs6000_vector_align[V2DFmode] = align64;
3028 /* V4SF mode, either VSX or Altivec. */
3029 if (TARGET_VSX)
3031 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3032 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3033 rs6000_vector_align[V4SFmode] = align32;
3035 else if (TARGET_ALTIVEC)
3037 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3038 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3039 rs6000_vector_align[V4SFmode] = align32;
3042 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3043 and stores. */
3044 if (TARGET_ALTIVEC)
3046 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3047 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3048 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3049 rs6000_vector_align[V4SImode] = align32;
3050 rs6000_vector_align[V8HImode] = align32;
3051 rs6000_vector_align[V16QImode] = align32;
3053 if (TARGET_VSX)
3055 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3056 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3057 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3059 else
3061 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3062 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3063 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3067 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3068 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3069 if (TARGET_VSX)
3071 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3072 rs6000_vector_unit[V2DImode]
3073 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3074 rs6000_vector_align[V2DImode] = align64;
3076 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3077 rs6000_vector_unit[V1TImode]
3078 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3079 rs6000_vector_align[V1TImode] = 128;
3082 /* DFmode, see if we want to use the VSX unit. Memory is handled
3083 differently, so don't set rs6000_vector_mem. */
3084 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3086 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3087 rs6000_vector_align[DFmode] = 64;
3090 /* SFmode, see if we want to use the VSX unit. */
3091 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3093 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3094 rs6000_vector_align[SFmode] = 32;
3097 /* Allow TImode in VSX register and set the VSX memory macros. */
3098 if (TARGET_VSX && TARGET_VSX_TIMODE)
3100 rs6000_vector_mem[TImode] = VECTOR_VSX;
3101 rs6000_vector_align[TImode] = align64;
3104 /* TODO add SPE and paired floating point vector support. */
3106 /* Register class constraints for the constraints that depend on compile
3107 switches. When the VSX code was added, different constraints were added
3108 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3109 of the VSX registers are used. The register classes for scalar floating
3110 point types is set, based on whether we allow that type into the upper
3111 (Altivec) registers. GCC has register classes to target the Altivec
3112 registers for load/store operations, to select using a VSX memory
3113 operation instead of the traditional floating point operation. The
3114 constraints are:
3116 d - Register class to use with traditional DFmode instructions.
3117 f - Register class to use with traditional SFmode instructions.
3118 v - Altivec register.
3119 wa - Any VSX register.
3120 wc - Reserved to represent individual CR bits (used in LLVM).
3121 wd - Preferred register class for V2DFmode.
3122 wf - Preferred register class for V4SFmode.
3123 wg - Float register for power6x move insns.
3124 wh - FP register for direct move instructions.
3125 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3126 wj - FP or VSX register to hold 64-bit integers for direct moves.
3127 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3128 wl - Float register if we can do 32-bit signed int loads.
3129 wm - VSX register for ISA 2.07 direct move operations.
3130 wn - always NO_REGS.
3131 wr - GPR if 64-bit mode is permitted.
3132 ws - Register class to do ISA 2.06 DF operations.
3133 wt - VSX register for TImode in VSX registers.
3134 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3135 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3136 ww - Register class to do SF conversions in with VSX operations.
3137 wx - Float register if we can do 32-bit int stores.
3138 wy - Register class to do ISA 2.07 SF operations.
3139 wz - Float register if we can do 32-bit unsigned int loads.
3140 wH - Altivec register if SImode is allowed in VSX registers.
3141 wI - VSX register if SImode is allowed in VSX registers.
3142 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3143 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3145 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3146 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3148 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3149 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3151 if (TARGET_VSX)
3153 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3154 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3155 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3157 if (TARGET_VSX_TIMODE)
3158 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3160 if (TARGET_UPPER_REGS_DF) /* DFmode */
3162 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3163 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3165 else
3166 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3168 if (TARGET_UPPER_REGS_DF) /* DImode */
3169 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3170 else
3171 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3174 /* Add conditional constraints based on various options, to allow us to
3175 collapse multiple insn patterns. */
3176 if (TARGET_ALTIVEC)
3177 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3179 if (TARGET_MFPGPR) /* DFmode */
3180 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3182 if (TARGET_LFIWAX)
3183 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3185 if (TARGET_DIRECT_MOVE)
3187 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3188 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3189 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3190 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3191 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3192 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3195 if (TARGET_POWERPC64)
3196 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3198 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3200 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3201 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3202 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3204 else if (TARGET_P8_VECTOR)
3206 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3207 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3209 else if (TARGET_VSX)
3210 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3212 if (TARGET_STFIWX)
3213 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3215 if (TARGET_LFIWZX)
3216 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3218 if (TARGET_FLOAT128_TYPE)
3220 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3221 if (FLOAT128_IEEE_P (TFmode))
3222 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3225 /* Support for new D-form instructions. */
3226 if (TARGET_P9_DFORM_SCALAR)
3227 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3229 /* Support for ISA 3.0 (power9) vectors. */
3230 if (TARGET_P9_VECTOR)
3231 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3233 /* Support for new direct moves (ISA 3.0 + 64bit). */
3234 if (TARGET_DIRECT_MOVE_128)
3235 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3237 /* Support small integers in VSX registers. */
3238 if (TARGET_VSX_SMALL_INTEGER)
3240 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3241 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3242 if (TARGET_P9_VECTOR)
3244 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3245 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3249 /* Set up the reload helper and direct move functions. */
3250 if (TARGET_VSX || TARGET_ALTIVEC)
3252 if (TARGET_64BIT)
3254 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3255 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3256 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3257 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3258 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3259 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3260 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3261 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3262 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3263 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3264 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3265 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3266 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3267 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3268 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3269 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3270 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3271 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3272 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3273 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3275 if (FLOAT128_VECTOR_P (KFmode))
3277 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3278 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3281 if (FLOAT128_VECTOR_P (TFmode))
3283 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3284 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3287 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3288 available. */
3289 if (TARGET_NO_SDMODE_STACK)
3291 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3292 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3295 if (TARGET_VSX_TIMODE)
3297 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3298 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3301 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3303 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3304 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3305 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3306 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3307 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3308 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3309 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3310 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3311 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3313 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3314 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3315 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3316 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3317 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3318 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3319 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3320 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3321 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3323 if (FLOAT128_VECTOR_P (KFmode))
3325 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3326 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3329 if (FLOAT128_VECTOR_P (TFmode))
3331 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3332 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3336 else
3338 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3339 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3340 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3341 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3342 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3343 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3344 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3345 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3346 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3347 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3348 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3349 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3350 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3351 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3352 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3353 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3354 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3355 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3356 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3357 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3359 if (FLOAT128_VECTOR_P (KFmode))
3361 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3362 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3365 if (FLOAT128_IEEE_P (TFmode))
3367 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3368 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3371 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3372 available. */
3373 if (TARGET_NO_SDMODE_STACK)
3375 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3376 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3379 if (TARGET_VSX_TIMODE)
3381 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3382 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3385 if (TARGET_DIRECT_MOVE)
3387 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3388 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3389 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3393 if (TARGET_UPPER_REGS_DF)
3394 reg_addr[DFmode].scalar_in_vmx_p = true;
3396 if (TARGET_UPPER_REGS_DI)
3397 reg_addr[DImode].scalar_in_vmx_p = true;
3399 if (TARGET_UPPER_REGS_SF)
3400 reg_addr[SFmode].scalar_in_vmx_p = true;
3402 if (TARGET_VSX_SMALL_INTEGER)
3403 reg_addr[SImode].scalar_in_vmx_p = true;
3406 /* Setup the fusion operations. */
3407 if (TARGET_P8_FUSION)
3409 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3410 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3411 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3412 if (TARGET_64BIT)
3413 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3416 if (TARGET_P9_FUSION)
3418 struct fuse_insns {
3419 enum machine_mode mode; /* mode of the fused type. */
3420 enum machine_mode pmode; /* pointer mode. */
3421 enum rs6000_reload_reg_type rtype; /* register type. */
3422 enum insn_code load; /* load insn. */
3423 enum insn_code store; /* store insn. */
3426 static const struct fuse_insns addis_insns[] = {
3427 { SFmode, DImode, RELOAD_REG_FPR,
3428 CODE_FOR_fusion_fpr_di_sf_load,
3429 CODE_FOR_fusion_fpr_di_sf_store },
3431 { SFmode, SImode, RELOAD_REG_FPR,
3432 CODE_FOR_fusion_fpr_si_sf_load,
3433 CODE_FOR_fusion_fpr_si_sf_store },
3435 { DFmode, DImode, RELOAD_REG_FPR,
3436 CODE_FOR_fusion_fpr_di_df_load,
3437 CODE_FOR_fusion_fpr_di_df_store },
3439 { DFmode, SImode, RELOAD_REG_FPR,
3440 CODE_FOR_fusion_fpr_si_df_load,
3441 CODE_FOR_fusion_fpr_si_df_store },
3443 { DImode, DImode, RELOAD_REG_FPR,
3444 CODE_FOR_fusion_fpr_di_di_load,
3445 CODE_FOR_fusion_fpr_di_di_store },
3447 { DImode, SImode, RELOAD_REG_FPR,
3448 CODE_FOR_fusion_fpr_si_di_load,
3449 CODE_FOR_fusion_fpr_si_di_store },
3451 { QImode, DImode, RELOAD_REG_GPR,
3452 CODE_FOR_fusion_gpr_di_qi_load,
3453 CODE_FOR_fusion_gpr_di_qi_store },
3455 { QImode, SImode, RELOAD_REG_GPR,
3456 CODE_FOR_fusion_gpr_si_qi_load,
3457 CODE_FOR_fusion_gpr_si_qi_store },
3459 { HImode, DImode, RELOAD_REG_GPR,
3460 CODE_FOR_fusion_gpr_di_hi_load,
3461 CODE_FOR_fusion_gpr_di_hi_store },
3463 { HImode, SImode, RELOAD_REG_GPR,
3464 CODE_FOR_fusion_gpr_si_hi_load,
3465 CODE_FOR_fusion_gpr_si_hi_store },
3467 { SImode, DImode, RELOAD_REG_GPR,
3468 CODE_FOR_fusion_gpr_di_si_load,
3469 CODE_FOR_fusion_gpr_di_si_store },
3471 { SImode, SImode, RELOAD_REG_GPR,
3472 CODE_FOR_fusion_gpr_si_si_load,
3473 CODE_FOR_fusion_gpr_si_si_store },
3475 { SFmode, DImode, RELOAD_REG_GPR,
3476 CODE_FOR_fusion_gpr_di_sf_load,
3477 CODE_FOR_fusion_gpr_di_sf_store },
3479 { SFmode, SImode, RELOAD_REG_GPR,
3480 CODE_FOR_fusion_gpr_si_sf_load,
3481 CODE_FOR_fusion_gpr_si_sf_store },
3483 { DImode, DImode, RELOAD_REG_GPR,
3484 CODE_FOR_fusion_gpr_di_di_load,
3485 CODE_FOR_fusion_gpr_di_di_store },
3487 { DFmode, DImode, RELOAD_REG_GPR,
3488 CODE_FOR_fusion_gpr_di_df_load,
3489 CODE_FOR_fusion_gpr_di_df_store },
3492 enum machine_mode cur_pmode = Pmode;
3493 size_t i;
3495 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3497 enum machine_mode xmode = addis_insns[i].mode;
3498 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3500 if (addis_insns[i].pmode != cur_pmode)
3501 continue;
3503 if (rtype == RELOAD_REG_FPR
3504 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3505 continue;
3507 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3508 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3512 /* Note which types we support fusing TOC setup plus memory insn. We only do
3513 fused TOCs for medium/large code models. */
3514 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3515 && (TARGET_CMODEL != CMODEL_SMALL))
3517 reg_addr[QImode].fused_toc = true;
3518 reg_addr[HImode].fused_toc = true;
3519 reg_addr[SImode].fused_toc = true;
3520 reg_addr[DImode].fused_toc = true;
3521 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3523 if (TARGET_SINGLE_FLOAT)
3524 reg_addr[SFmode].fused_toc = true;
3525 if (TARGET_DOUBLE_FLOAT)
3526 reg_addr[DFmode].fused_toc = true;
3530 /* Precalculate HARD_REGNO_NREGS. */
3531 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3532 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3533 rs6000_hard_regno_nregs[m][r]
3534 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3536 /* Precalculate HARD_REGNO_MODE_OK. */
3537 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3538 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3539 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3540 rs6000_hard_regno_mode_ok_p[m][r] = true;
3542 /* Precalculate CLASS_MAX_NREGS sizes. */
3543 for (c = 0; c < LIM_REG_CLASSES; ++c)
3545 int reg_size;
3547 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3548 reg_size = UNITS_PER_VSX_WORD;
3550 else if (c == ALTIVEC_REGS)
3551 reg_size = UNITS_PER_ALTIVEC_WORD;
3553 else if (c == FLOAT_REGS)
3554 reg_size = UNITS_PER_FP_WORD;
3556 else
3557 reg_size = UNITS_PER_WORD;
3559 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3561 machine_mode m2 = (machine_mode)m;
3562 int reg_size2 = reg_size;
3564 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3565 in VSX. */
3566 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3567 reg_size2 = UNITS_PER_FP_WORD;
3569 rs6000_class_max_nregs[m][c]
3570 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3574 if (TARGET_E500_DOUBLE)
3575 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3577 /* Calculate which modes to automatically generate code to use a the
3578 reciprocal divide and square root instructions. In the future, possibly
3579 automatically generate the instructions even if the user did not specify
3580 -mrecip. The older machines double precision reciprocal sqrt estimate is
3581 not accurate enough. */
3582 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3583 if (TARGET_FRES)
3584 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3585 if (TARGET_FRE)
3586 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3587 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3588 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3589 if (VECTOR_UNIT_VSX_P (V2DFmode))
3590 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3592 if (TARGET_FRSQRTES)
3593 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3594 if (TARGET_FRSQRTE)
3595 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3596 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3597 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3598 if (VECTOR_UNIT_VSX_P (V2DFmode))
3599 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3601 if (rs6000_recip_control)
3603 if (!flag_finite_math_only)
3604 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3605 if (flag_trapping_math)
3606 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3607 if (!flag_reciprocal_math)
3608 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3609 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3611 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3612 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3613 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3615 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3616 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3617 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3619 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3620 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3621 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3623 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3624 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3625 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3627 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3628 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3629 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3631 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3632 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3633 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3635 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3636 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3637 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3639 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3640 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3641 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3645 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3646 legitimate address support to figure out the appropriate addressing to
3647 use. */
3648 rs6000_setup_reg_addr_masks ();
3650 if (global_init_p || TARGET_DEBUG_TARGET)
3652 if (TARGET_DEBUG_REG)
3653 rs6000_debug_reg_global ();
3655 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3656 fprintf (stderr,
3657 "SImode variable mult cost = %d\n"
3658 "SImode constant mult cost = %d\n"
3659 "SImode short constant mult cost = %d\n"
3660 "DImode multipliciation cost = %d\n"
3661 "SImode division cost = %d\n"
3662 "DImode division cost = %d\n"
3663 "Simple fp operation cost = %d\n"
3664 "DFmode multiplication cost = %d\n"
3665 "SFmode division cost = %d\n"
3666 "DFmode division cost = %d\n"
3667 "cache line size = %d\n"
3668 "l1 cache size = %d\n"
3669 "l2 cache size = %d\n"
3670 "simultaneous prefetches = %d\n"
3671 "\n",
3672 rs6000_cost->mulsi,
3673 rs6000_cost->mulsi_const,
3674 rs6000_cost->mulsi_const9,
3675 rs6000_cost->muldi,
3676 rs6000_cost->divsi,
3677 rs6000_cost->divdi,
3678 rs6000_cost->fp,
3679 rs6000_cost->dmul,
3680 rs6000_cost->sdiv,
3681 rs6000_cost->ddiv,
3682 rs6000_cost->cache_line_size,
3683 rs6000_cost->l1_cache_size,
3684 rs6000_cost->l2_cache_size,
3685 rs6000_cost->simultaneous_prefetches);
3689 #if TARGET_MACHO
3690 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3692 static void
3693 darwin_rs6000_override_options (void)
3695 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3696 off. */
3697 rs6000_altivec_abi = 1;
3698 TARGET_ALTIVEC_VRSAVE = 1;
3699 rs6000_current_abi = ABI_DARWIN;
3701 if (DEFAULT_ABI == ABI_DARWIN
3702 && TARGET_64BIT)
3703 darwin_one_byte_bool = 1;
3705 if (TARGET_64BIT && ! TARGET_POWERPC64)
3707 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3708 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3710 if (flag_mkernel)
3712 rs6000_default_long_calls = 1;
3713 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3716 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3717 Altivec. */
3718 if (!flag_mkernel && !flag_apple_kext
3719 && TARGET_64BIT
3720 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3721 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3723 /* Unless the user (not the configurer) has explicitly overridden
3724 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3725 G4 unless targeting the kernel. */
3726 if (!flag_mkernel
3727 && !flag_apple_kext
3728 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3729 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3730 && ! global_options_set.x_rs6000_cpu_index)
3732 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3735 #endif
3737 /* If not otherwise specified by a target, make 'long double' equivalent to
3738 'double'. */
3740 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3741 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3742 #endif
3744 /* Return the builtin mask of the various options used that could affect which
3745 builtins were used. In the past we used target_flags, but we've run out of
3746 bits, and some options like SPE and PAIRED are no longer in
3747 target_flags. */
3749 HOST_WIDE_INT
3750 rs6000_builtin_mask_calculate (void)
3752 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3753 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3754 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3755 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3756 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3757 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3758 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3759 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3760 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3761 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3762 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3763 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3764 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3765 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3766 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3767 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3768 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3769 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3770 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3771 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3772 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3775 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3776 to clobber the XER[CA] bit because clobbering that bit without telling
3777 the compiler worked just fine with versions of GCC before GCC 5, and
3778 breaking a lot of older code in ways that are hard to track down is
3779 not such a great idea. */
3781 static rtx_insn *
3782 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3783 vec<const char *> &/*constraints*/,
3784 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3786 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3787 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3788 return NULL;
3791 /* Override command line options. Mostly we process the processor type and
3792 sometimes adjust other TARGET_ options. */
3794 static bool
3795 rs6000_option_override_internal (bool global_init_p)
3797 bool ret = true;
3798 bool have_cpu = false;
3800 /* The default cpu requested at configure time, if any. */
3801 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3803 HOST_WIDE_INT set_masks;
3804 int cpu_index;
3805 int tune_index;
3806 struct cl_target_option *main_target_opt
3807 = ((global_init_p || target_option_default_node == NULL)
3808 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3810 /* Print defaults. */
3811 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3812 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3814 /* Remember the explicit arguments. */
3815 if (global_init_p)
3816 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3818 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3819 library functions, so warn about it. The flag may be useful for
3820 performance studies from time to time though, so don't disable it
3821 entirely. */
3822 if (global_options_set.x_rs6000_alignment_flags
3823 && rs6000_alignment_flags == MASK_ALIGN_POWER
3824 && DEFAULT_ABI == ABI_DARWIN
3825 && TARGET_64BIT)
3826 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3827 " it is incompatible with the installed C and C++ libraries");
3829 /* Numerous experiment shows that IRA based loop pressure
3830 calculation works better for RTL loop invariant motion on targets
3831 with enough (>= 32) registers. It is an expensive optimization.
3832 So it is on only for peak performance. */
3833 if (optimize >= 3 && global_init_p
3834 && !global_options_set.x_flag_ira_loop_pressure)
3835 flag_ira_loop_pressure = 1;
3837 /* Set the pointer size. */
3838 if (TARGET_64BIT)
3840 rs6000_pmode = (int)DImode;
3841 rs6000_pointer_size = 64;
3843 else
3845 rs6000_pmode = (int)SImode;
3846 rs6000_pointer_size = 32;
3849 /* Some OSs don't support saving the high part of 64-bit registers on context
3850 switch. Other OSs don't support saving Altivec registers. On those OSs,
3851 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3852 if the user wants either, the user must explicitly specify them and we
3853 won't interfere with the user's specification. */
3855 set_masks = POWERPC_MASKS;
3856 #ifdef OS_MISSING_POWERPC64
3857 if (OS_MISSING_POWERPC64)
3858 set_masks &= ~OPTION_MASK_POWERPC64;
3859 #endif
3860 #ifdef OS_MISSING_ALTIVEC
3861 if (OS_MISSING_ALTIVEC)
3862 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3863 #endif
3865 /* Don't override by the processor default if given explicitly. */
3866 set_masks &= ~rs6000_isa_flags_explicit;
3868 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3869 the cpu in a target attribute or pragma, but did not specify a tuning
3870 option, use the cpu for the tuning option rather than the option specified
3871 with -mtune on the command line. Process a '--with-cpu' configuration
3872 request as an implicit --cpu. */
3873 if (rs6000_cpu_index >= 0)
3875 cpu_index = rs6000_cpu_index;
3876 have_cpu = true;
3878 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3880 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3881 have_cpu = true;
3883 else if (implicit_cpu)
3885 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3886 have_cpu = true;
3888 else
3890 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3891 const char *default_cpu = ((!TARGET_POWERPC64)
3892 ? "powerpc"
3893 : ((BYTES_BIG_ENDIAN)
3894 ? "powerpc64"
3895 : "powerpc64le"));
3897 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3898 have_cpu = false;
3901 gcc_assert (cpu_index >= 0);
3903 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3904 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3905 with those from the cpu, except for options that were explicitly set. If
3906 we don't have a cpu, do not override the target bits set in
3907 TARGET_DEFAULT. */
3908 if (have_cpu)
3910 rs6000_isa_flags &= ~set_masks;
3911 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3912 & set_masks);
3914 else
3916 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3917 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3918 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3919 to using rs6000_isa_flags, we need to do the initialization here.
3921 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3922 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3923 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3924 : processor_target_table[cpu_index].target_enable);
3925 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3928 if (rs6000_tune_index >= 0)
3929 tune_index = rs6000_tune_index;
3930 else if (have_cpu)
3931 rs6000_tune_index = tune_index = cpu_index;
3932 else
3934 size_t i;
3935 enum processor_type tune_proc
3936 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3938 tune_index = -1;
3939 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3940 if (processor_target_table[i].processor == tune_proc)
3942 rs6000_tune_index = tune_index = i;
3943 break;
3947 gcc_assert (tune_index >= 0);
3948 rs6000_cpu = processor_target_table[tune_index].processor;
3950 /* Pick defaults for SPE related control flags. Do this early to make sure
3951 that the TARGET_ macros are representative ASAP. */
3953 int spe_capable_cpu =
3954 (rs6000_cpu == PROCESSOR_PPC8540
3955 || rs6000_cpu == PROCESSOR_PPC8548);
3957 if (!global_options_set.x_rs6000_spe_abi)
3958 rs6000_spe_abi = spe_capable_cpu;
3960 if (!global_options_set.x_rs6000_spe)
3961 rs6000_spe = spe_capable_cpu;
3963 if (!global_options_set.x_rs6000_float_gprs)
3964 rs6000_float_gprs =
3965 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3966 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3967 : 0);
3970 if (global_options_set.x_rs6000_spe_abi
3971 && rs6000_spe_abi
3972 && !TARGET_SPE_ABI)
3973 error ("not configured for SPE ABI");
3975 if (global_options_set.x_rs6000_spe
3976 && rs6000_spe
3977 && !TARGET_SPE)
3978 error ("not configured for SPE instruction set");
3980 if (main_target_opt != NULL
3981 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3982 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3983 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3984 error ("target attribute or pragma changes SPE ABI");
3986 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3987 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3988 || rs6000_cpu == PROCESSOR_PPCE5500)
3990 if (TARGET_ALTIVEC)
3991 error ("AltiVec not supported in this target");
3992 if (TARGET_SPE)
3993 error ("SPE not supported in this target");
3995 if (rs6000_cpu == PROCESSOR_PPCE6500)
3997 if (TARGET_SPE)
3998 error ("SPE not supported in this target");
4001 /* Disable Cell microcode if we are optimizing for the Cell
4002 and not optimizing for size. */
4003 if (rs6000_gen_cell_microcode == -1)
4004 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4005 && !optimize_size);
4007 /* If we are optimizing big endian systems for space and it's OK to
4008 use instructions that would be microcoded on the Cell, use the
4009 load/store multiple and string instructions. */
4010 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4011 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4012 | OPTION_MASK_STRING);
4014 /* Don't allow -mmultiple or -mstring on little endian systems
4015 unless the cpu is a 750, because the hardware doesn't support the
4016 instructions used in little endian mode, and causes an alignment
4017 trap. The 750 does not cause an alignment trap (except when the
4018 target is unaligned). */
4020 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4022 if (TARGET_MULTIPLE)
4024 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4025 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4026 warning (0, "-mmultiple is not supported on little endian systems");
4029 if (TARGET_STRING)
4031 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4033 warning (0, "-mstring is not supported on little endian systems");
4037 /* If little-endian, default to -mstrict-align on older processors.
4038 Testing for htm matches power8 and later. */
4039 if (!BYTES_BIG_ENDIAN
4040 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4041 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4043 /* -maltivec={le,be} implies -maltivec. */
4044 if (rs6000_altivec_element_order != 0)
4045 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4047 /* Disallow -maltivec=le in big endian mode for now. This is not
4048 known to be useful for anyone. */
4049 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4051 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4052 rs6000_altivec_element_order = 0;
4055 /* Add some warnings for VSX. */
4056 if (TARGET_VSX)
4058 const char *msg = NULL;
4059 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4060 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4062 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4063 msg = N_("-mvsx requires hardware floating point");
4064 else
4066 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4067 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4070 else if (TARGET_PAIRED_FLOAT)
4071 msg = N_("-mvsx and -mpaired are incompatible");
4072 else if (TARGET_AVOID_XFORM > 0)
4073 msg = N_("-mvsx needs indexed addressing");
4074 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4075 & OPTION_MASK_ALTIVEC))
4077 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4078 msg = N_("-mvsx and -mno-altivec are incompatible");
4079 else
4080 msg = N_("-mno-altivec disables vsx");
4083 if (msg)
4085 warning (0, msg);
4086 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4087 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4091 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4092 the -mcpu setting to enable options that conflict. */
4093 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4094 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4095 | OPTION_MASK_ALTIVEC
4096 | OPTION_MASK_VSX)) != 0)
4097 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4098 | OPTION_MASK_DIRECT_MOVE)
4099 & ~rs6000_isa_flags_explicit);
4101 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4102 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4104 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4105 unless the user explicitly used the -mno-<option> to disable the code. */
4106 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4107 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4108 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4109 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4110 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4111 else if (TARGET_VSX)
4112 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4113 else if (TARGET_POPCNTD)
4114 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4115 else if (TARGET_DFP)
4116 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4117 else if (TARGET_CMPB)
4118 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4119 else if (TARGET_FPRND)
4120 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4121 else if (TARGET_POPCNTB)
4122 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4123 else if (TARGET_ALTIVEC)
4124 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4126 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4128 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4129 error ("-mcrypto requires -maltivec");
4130 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4133 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4135 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4136 error ("-mdirect-move requires -mvsx");
4137 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4140 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4142 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4143 error ("-mpower8-vector requires -maltivec");
4144 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4147 if (TARGET_P8_VECTOR && !TARGET_VSX)
4149 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4150 error ("-mpower8-vector requires -mvsx");
4151 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4154 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4156 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4157 error ("-mvsx-timode requires -mvsx");
4158 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4161 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4163 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4164 error ("-mhard-dfp requires -mhard-float");
4165 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4168 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4169 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4170 set the individual option. */
4171 if (TARGET_UPPER_REGS > 0)
4173 if (TARGET_VSX
4174 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4176 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4177 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4179 if (TARGET_VSX
4180 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4182 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4183 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4185 if (TARGET_P8_VECTOR
4186 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4188 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4189 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4192 else if (TARGET_UPPER_REGS == 0)
4194 if (TARGET_VSX
4195 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4197 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4198 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4200 if (TARGET_VSX
4201 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4203 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4204 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4206 if (TARGET_P8_VECTOR
4207 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4209 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4210 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4214 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4216 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4217 error ("-mupper-regs-df requires -mvsx");
4218 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4221 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4223 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4224 error ("-mupper-regs-di requires -mvsx");
4225 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4228 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4230 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4231 error ("-mupper-regs-sf requires -mpower8-vector");
4232 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4235 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4236 silently turn off quad memory mode. */
4237 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4239 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4240 warning (0, N_("-mquad-memory requires 64-bit mode"));
4242 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4243 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4245 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4246 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4249 /* Non-atomic quad memory load/store are disabled for little endian, since
4250 the words are reversed, but atomic operations can still be done by
4251 swapping the words. */
4252 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4254 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4255 warning (0, N_("-mquad-memory is not available in little endian mode"));
4257 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4260 /* Assume if the user asked for normal quad memory instructions, they want
4261 the atomic versions as well, unless they explicity told us not to use quad
4262 word atomic instructions. */
4263 if (TARGET_QUAD_MEMORY
4264 && !TARGET_QUAD_MEMORY_ATOMIC
4265 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4266 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4268 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4269 generating power8 instructions. */
4270 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4271 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4272 & OPTION_MASK_P8_FUSION);
4274 /* Setting additional fusion flags turns on base fusion. */
4275 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4277 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4279 if (TARGET_P8_FUSION_SIGN)
4280 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4282 if (TARGET_TOC_FUSION)
4283 error ("-mtoc-fusion requires -mpower8-fusion");
4285 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4287 else
4288 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4291 /* Power9 fusion is a superset over power8 fusion. */
4292 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4294 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4296 /* We prefer to not mention undocumented options in
4297 error messages. However, if users have managed to select
4298 power9-fusion without selecting power8-fusion, they
4299 already know about undocumented flags. */
4300 error ("-mpower9-fusion requires -mpower8-fusion");
4301 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4303 else
4304 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4307 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4308 generating power9 instructions. */
4309 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4310 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4311 & OPTION_MASK_P9_FUSION);
4313 /* Power8 does not fuse sign extended loads with the addis. If we are
4314 optimizing at high levels for speed, convert a sign extended load into a
4315 zero extending load, and an explicit sign extension. */
4316 if (TARGET_P8_FUSION
4317 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4318 && optimize_function_for_speed_p (cfun)
4319 && optimize >= 3)
4320 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4322 /* TOC fusion requires 64-bit and medium/large code model. */
4323 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4325 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4326 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4327 warning (0, N_("-mtoc-fusion requires 64-bit"));
4330 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4332 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4333 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4334 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4337 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4338 model. */
4339 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4340 && (TARGET_CMODEL != CMODEL_SMALL)
4341 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4342 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4344 /* ISA 3.0 vector instructions include ISA 2.07. */
4345 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4347 /* We prefer to not mention undocumented options in
4348 error messages. However, if users have managed to select
4349 power9-vector without selecting power8-vector, they
4350 already know about undocumented flags. */
4351 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4352 error ("-mpower9-vector requires -mpower8-vector");
4353 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4356 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4357 -mpower9-dform-vector. */
4358 if (TARGET_P9_DFORM_BOTH > 0)
4360 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4361 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4363 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4364 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4366 else if (TARGET_P9_DFORM_BOTH == 0)
4368 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4369 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4371 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4372 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4375 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4376 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4378 /* We prefer to not mention undocumented options in
4379 error messages. However, if users have managed to select
4380 power9-dform without selecting power9-vector, they
4381 already know about undocumented flags. */
4382 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4383 error ("-mpower9-dform requires -mpower9-vector");
4384 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4385 | OPTION_MASK_P9_DFORM_VECTOR);
4388 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4390 /* We prefer to not mention undocumented options in
4391 error messages. However, if users have managed to select
4392 power9-dform without selecting upper-regs-df, they
4393 already know about undocumented flags. */
4394 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4395 error ("-mpower9-dform requires -mupper-regs-df");
4396 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4399 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4401 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4402 error ("-mpower9-dform requires -mupper-regs-sf");
4403 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4406 /* Enable LRA by default. */
4407 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4408 rs6000_isa_flags |= OPTION_MASK_LRA;
4410 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4411 but do show up with -mno-lra. Given -mlra will become the default once
4412 PR 69847 is fixed, turn off the options with problems by default if
4413 -mno-lra was used, and warn if the user explicitly asked for the option.
4415 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4416 Enable -mvsx-timode by default if LRA and VSX. */
4417 if (!TARGET_LRA)
4419 if (TARGET_VSX_TIMODE)
4421 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4422 warning (0, "-mvsx-timode might need -mlra");
4424 else
4425 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4429 else
4431 if (TARGET_VSX && !TARGET_VSX_TIMODE
4432 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4433 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4436 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4437 support. If we only have ISA 2.06 support, and the user did not specify
4438 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4439 but we don't enable the full vectorization support */
4440 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4441 TARGET_ALLOW_MOVMISALIGN = 1;
4443 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4445 if (TARGET_ALLOW_MOVMISALIGN > 0
4446 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4447 error ("-mallow-movmisalign requires -mvsx");
4449 TARGET_ALLOW_MOVMISALIGN = 0;
4452 /* Determine when unaligned vector accesses are permitted, and when
4453 they are preferred over masked Altivec loads. Note that if
4454 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4455 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4456 not true. */
4457 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4459 if (!TARGET_VSX)
4461 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4462 error ("-mefficient-unaligned-vsx requires -mvsx");
4464 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4467 else if (!TARGET_ALLOW_MOVMISALIGN)
4469 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4470 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4472 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4476 /* Check whether we should allow small integers into VSX registers. We
4477 require direct move to prevent the register allocator from having to move
4478 variables through memory to do moves. SImode can be used on ISA 2.07,
4479 while HImode and QImode require ISA 3.0. */
4480 if (TARGET_VSX_SMALL_INTEGER
4481 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4483 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4484 error ("-mvsx-small-integer requires -mpower8-vector, "
4485 "-mupper-regs-di, and -mdirect-move");
4487 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4490 /* Set long double size before the IEEE 128-bit tests. */
4491 if (!global_options_set.x_rs6000_long_double_type_size)
4493 if (main_target_opt != NULL
4494 && (main_target_opt->x_rs6000_long_double_type_size
4495 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4496 error ("target attribute or pragma changes long double size");
4497 else
4498 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4501 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4502 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4503 pick up this default. */
4504 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4505 if (!global_options_set.x_rs6000_ieeequad)
4506 rs6000_ieeequad = 1;
4507 #endif
4509 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4510 sytems, but don't enable the __float128 keyword. */
4511 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4512 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4513 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4514 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4516 /* IEEE 128-bit floating point requires VSX support. */
4517 if (!TARGET_VSX)
4519 if (TARGET_FLOAT128_KEYWORD)
4521 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4522 error ("-mfloat128 requires VSX support");
4524 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4525 | OPTION_MASK_FLOAT128_KEYWORD
4526 | OPTION_MASK_FLOAT128_HW);
4529 else if (TARGET_FLOAT128_TYPE)
4531 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4532 error ("-mfloat128-type requires VSX support");
4534 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4535 | OPTION_MASK_FLOAT128_KEYWORD
4536 | OPTION_MASK_FLOAT128_HW);
4540 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4541 128-bit floating point support to be enabled. */
4542 if (!TARGET_FLOAT128_TYPE)
4544 if (TARGET_FLOAT128_KEYWORD)
4546 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4548 error ("-mfloat128 requires -mfloat128-type");
4549 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4550 | OPTION_MASK_FLOAT128_KEYWORD
4551 | OPTION_MASK_FLOAT128_HW);
4553 else
4554 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4557 if (TARGET_FLOAT128_HW)
4559 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4561 error ("-mfloat128-hardware requires -mfloat128-type");
4562 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4564 else
4565 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4566 | OPTION_MASK_FLOAT128_KEYWORD
4567 | OPTION_MASK_FLOAT128_HW);
4571 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4572 -mfloat128-hardware by default. However, don't enable the __float128
4573 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4574 -mfloat128 option as well if it was not already set. */
4575 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4576 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4577 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4578 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4580 if (TARGET_FLOAT128_HW
4581 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4583 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4584 error ("-mfloat128-hardware requires full ISA 3.0 support");
4586 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4589 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4590 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4591 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4592 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4594 /* Print the options after updating the defaults. */
4595 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4596 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4598 /* E500mc does "better" if we inline more aggressively. Respect the
4599 user's opinion, though. */
4600 if (rs6000_block_move_inline_limit == 0
4601 && (rs6000_cpu == PROCESSOR_PPCE500MC
4602 || rs6000_cpu == PROCESSOR_PPCE500MC64
4603 || rs6000_cpu == PROCESSOR_PPCE5500
4604 || rs6000_cpu == PROCESSOR_PPCE6500))
4605 rs6000_block_move_inline_limit = 128;
4607 /* store_one_arg depends on expand_block_move to handle at least the
4608 size of reg_parm_stack_space. */
4609 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4610 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4612 if (global_init_p)
4614 /* If the appropriate debug option is enabled, replace the target hooks
4615 with debug versions that call the real version and then prints
4616 debugging information. */
4617 if (TARGET_DEBUG_COST)
4619 targetm.rtx_costs = rs6000_debug_rtx_costs;
4620 targetm.address_cost = rs6000_debug_address_cost;
4621 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4624 if (TARGET_DEBUG_ADDR)
4626 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4627 targetm.legitimize_address = rs6000_debug_legitimize_address;
4628 rs6000_secondary_reload_class_ptr
4629 = rs6000_debug_secondary_reload_class;
4630 rs6000_secondary_memory_needed_ptr
4631 = rs6000_debug_secondary_memory_needed;
4632 rs6000_cannot_change_mode_class_ptr
4633 = rs6000_debug_cannot_change_mode_class;
4634 rs6000_preferred_reload_class_ptr
4635 = rs6000_debug_preferred_reload_class;
4636 rs6000_legitimize_reload_address_ptr
4637 = rs6000_debug_legitimize_reload_address;
4638 rs6000_mode_dependent_address_ptr
4639 = rs6000_debug_mode_dependent_address;
4642 if (rs6000_veclibabi_name)
4644 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4645 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4646 else
4648 error ("unknown vectorization library ABI type (%s) for "
4649 "-mveclibabi= switch", rs6000_veclibabi_name);
4650 ret = false;
4655 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4656 target attribute or pragma which automatically enables both options,
4657 unless the altivec ABI was set. This is set by default for 64-bit, but
4658 not for 32-bit. */
4659 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4660 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4661 | OPTION_MASK_FLOAT128_TYPE
4662 | OPTION_MASK_FLOAT128_KEYWORD)
4663 & ~rs6000_isa_flags_explicit);
4665 /* Enable Altivec ABI for AIX -maltivec. */
4666 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4668 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4669 error ("target attribute or pragma changes AltiVec ABI");
4670 else
4671 rs6000_altivec_abi = 1;
4674 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4675 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4676 be explicitly overridden in either case. */
4677 if (TARGET_ELF)
4679 if (!global_options_set.x_rs6000_altivec_abi
4680 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4682 if (main_target_opt != NULL &&
4683 !main_target_opt->x_rs6000_altivec_abi)
4684 error ("target attribute or pragma changes AltiVec ABI");
4685 else
4686 rs6000_altivec_abi = 1;
4690 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4691 So far, the only darwin64 targets are also MACH-O. */
4692 if (TARGET_MACHO
4693 && DEFAULT_ABI == ABI_DARWIN
4694 && TARGET_64BIT)
4696 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4697 error ("target attribute or pragma changes darwin64 ABI");
4698 else
4700 rs6000_darwin64_abi = 1;
4701 /* Default to natural alignment, for better performance. */
4702 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4706 /* Place FP constants in the constant pool instead of TOC
4707 if section anchors enabled. */
4708 if (flag_section_anchors
4709 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4710 TARGET_NO_FP_IN_TOC = 1;
4712 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4713 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4715 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4716 SUBTARGET_OVERRIDE_OPTIONS;
4717 #endif
4718 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4719 SUBSUBTARGET_OVERRIDE_OPTIONS;
4720 #endif
4721 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4722 SUB3TARGET_OVERRIDE_OPTIONS;
4723 #endif
4725 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4726 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4728 /* For the E500 family of cores, reset the single/double FP flags to let us
4729 check that they remain constant across attributes or pragmas. Also,
4730 clear a possible request for string instructions, not supported and which
4731 we might have silently queried above for -Os.
4733 For other families, clear ISEL in case it was set implicitly.
4736 switch (rs6000_cpu)
4738 case PROCESSOR_PPC8540:
4739 case PROCESSOR_PPC8548:
4740 case PROCESSOR_PPCE500MC:
4741 case PROCESSOR_PPCE500MC64:
4742 case PROCESSOR_PPCE5500:
4743 case PROCESSOR_PPCE6500:
4745 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4746 rs6000_double_float = TARGET_E500_DOUBLE;
4748 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4750 break;
4752 default:
4754 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4755 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4757 break;
4760 if (main_target_opt)
4762 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4763 error ("target attribute or pragma changes single precision floating "
4764 "point");
4765 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4766 error ("target attribute or pragma changes double precision floating "
4767 "point");
4770 /* Detect invalid option combinations with E500. */
4771 CHECK_E500_OPTIONS;
4773 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4774 && rs6000_cpu != PROCESSOR_POWER5
4775 && rs6000_cpu != PROCESSOR_POWER6
4776 && rs6000_cpu != PROCESSOR_POWER7
4777 && rs6000_cpu != PROCESSOR_POWER8
4778 && rs6000_cpu != PROCESSOR_POWER9
4779 && rs6000_cpu != PROCESSOR_PPCA2
4780 && rs6000_cpu != PROCESSOR_CELL
4781 && rs6000_cpu != PROCESSOR_PPC476);
4782 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4783 || rs6000_cpu == PROCESSOR_POWER5
4784 || rs6000_cpu == PROCESSOR_POWER7
4785 || rs6000_cpu == PROCESSOR_POWER8);
4786 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4787 || rs6000_cpu == PROCESSOR_POWER5
4788 || rs6000_cpu == PROCESSOR_POWER6
4789 || rs6000_cpu == PROCESSOR_POWER7
4790 || rs6000_cpu == PROCESSOR_POWER8
4791 || rs6000_cpu == PROCESSOR_POWER9
4792 || rs6000_cpu == PROCESSOR_PPCE500MC
4793 || rs6000_cpu == PROCESSOR_PPCE500MC64
4794 || rs6000_cpu == PROCESSOR_PPCE5500
4795 || rs6000_cpu == PROCESSOR_PPCE6500);
4797 /* Allow debug switches to override the above settings. These are set to -1
4798 in rs6000.opt to indicate the user hasn't directly set the switch. */
4799 if (TARGET_ALWAYS_HINT >= 0)
4800 rs6000_always_hint = TARGET_ALWAYS_HINT;
4802 if (TARGET_SCHED_GROUPS >= 0)
4803 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4805 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4806 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4808 rs6000_sched_restricted_insns_priority
4809 = (rs6000_sched_groups ? 1 : 0);
4811 /* Handle -msched-costly-dep option. */
4812 rs6000_sched_costly_dep
4813 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4815 if (rs6000_sched_costly_dep_str)
4817 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4818 rs6000_sched_costly_dep = no_dep_costly;
4819 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4820 rs6000_sched_costly_dep = all_deps_costly;
4821 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4822 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4823 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4824 rs6000_sched_costly_dep = store_to_load_dep_costly;
4825 else
4826 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4827 atoi (rs6000_sched_costly_dep_str));
4830 /* Handle -minsert-sched-nops option. */
4831 rs6000_sched_insert_nops
4832 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4834 if (rs6000_sched_insert_nops_str)
4836 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4837 rs6000_sched_insert_nops = sched_finish_none;
4838 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4839 rs6000_sched_insert_nops = sched_finish_pad_groups;
4840 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4841 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4842 else
4843 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4844 atoi (rs6000_sched_insert_nops_str));
4847 if (global_init_p)
4849 #ifdef TARGET_REGNAMES
4850 /* If the user desires alternate register names, copy in the
4851 alternate names now. */
4852 if (TARGET_REGNAMES)
4853 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4854 #endif
4856 /* Set aix_struct_return last, after the ABI is determined.
4857 If -maix-struct-return or -msvr4-struct-return was explicitly
4858 used, don't override with the ABI default. */
4859 if (!global_options_set.x_aix_struct_return)
4860 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4862 #if 0
4863 /* IBM XL compiler defaults to unsigned bitfields. */
4864 if (TARGET_XL_COMPAT)
4865 flag_signed_bitfields = 0;
4866 #endif
4868 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4869 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4871 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4873 /* We can only guarantee the availability of DI pseudo-ops when
4874 assembling for 64-bit targets. */
4875 if (!TARGET_64BIT)
4877 targetm.asm_out.aligned_op.di = NULL;
4878 targetm.asm_out.unaligned_op.di = NULL;
4882 /* Set branch target alignment, if not optimizing for size. */
4883 if (!optimize_size)
4885 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4886 aligned 8byte to avoid misprediction by the branch predictor. */
4887 if (rs6000_cpu == PROCESSOR_TITAN
4888 || rs6000_cpu == PROCESSOR_CELL)
4890 if (align_functions <= 0)
4891 align_functions = 8;
4892 if (align_jumps <= 0)
4893 align_jumps = 8;
4894 if (align_loops <= 0)
4895 align_loops = 8;
4897 if (rs6000_align_branch_targets)
4899 if (align_functions <= 0)
4900 align_functions = 16;
4901 if (align_jumps <= 0)
4902 align_jumps = 16;
4903 if (align_loops <= 0)
4905 can_override_loop_align = 1;
4906 align_loops = 16;
4909 if (align_jumps_max_skip <= 0)
4910 align_jumps_max_skip = 15;
4911 if (align_loops_max_skip <= 0)
4912 align_loops_max_skip = 15;
4915 /* Arrange to save and restore machine status around nested functions. */
4916 init_machine_status = rs6000_init_machine_status;
4918 /* We should always be splitting complex arguments, but we can't break
4919 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4920 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4921 targetm.calls.split_complex_arg = NULL;
4923 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4924 if (DEFAULT_ABI == ABI_AIX)
4925 targetm.calls.custom_function_descriptors = 0;
4928 /* Initialize rs6000_cost with the appropriate target costs. */
4929 if (optimize_size)
4930 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4931 else
4932 switch (rs6000_cpu)
4934 case PROCESSOR_RS64A:
4935 rs6000_cost = &rs64a_cost;
4936 break;
4938 case PROCESSOR_MPCCORE:
4939 rs6000_cost = &mpccore_cost;
4940 break;
4942 case PROCESSOR_PPC403:
4943 rs6000_cost = &ppc403_cost;
4944 break;
4946 case PROCESSOR_PPC405:
4947 rs6000_cost = &ppc405_cost;
4948 break;
4950 case PROCESSOR_PPC440:
4951 rs6000_cost = &ppc440_cost;
4952 break;
4954 case PROCESSOR_PPC476:
4955 rs6000_cost = &ppc476_cost;
4956 break;
4958 case PROCESSOR_PPC601:
4959 rs6000_cost = &ppc601_cost;
4960 break;
4962 case PROCESSOR_PPC603:
4963 rs6000_cost = &ppc603_cost;
4964 break;
4966 case PROCESSOR_PPC604:
4967 rs6000_cost = &ppc604_cost;
4968 break;
4970 case PROCESSOR_PPC604e:
4971 rs6000_cost = &ppc604e_cost;
4972 break;
4974 case PROCESSOR_PPC620:
4975 rs6000_cost = &ppc620_cost;
4976 break;
4978 case PROCESSOR_PPC630:
4979 rs6000_cost = &ppc630_cost;
4980 break;
4982 case PROCESSOR_CELL:
4983 rs6000_cost = &ppccell_cost;
4984 break;
4986 case PROCESSOR_PPC750:
4987 case PROCESSOR_PPC7400:
4988 rs6000_cost = &ppc750_cost;
4989 break;
4991 case PROCESSOR_PPC7450:
4992 rs6000_cost = &ppc7450_cost;
4993 break;
4995 case PROCESSOR_PPC8540:
4996 case PROCESSOR_PPC8548:
4997 rs6000_cost = &ppc8540_cost;
4998 break;
5000 case PROCESSOR_PPCE300C2:
5001 case PROCESSOR_PPCE300C3:
5002 rs6000_cost = &ppce300c2c3_cost;
5003 break;
5005 case PROCESSOR_PPCE500MC:
5006 rs6000_cost = &ppce500mc_cost;
5007 break;
5009 case PROCESSOR_PPCE500MC64:
5010 rs6000_cost = &ppce500mc64_cost;
5011 break;
5013 case PROCESSOR_PPCE5500:
5014 rs6000_cost = &ppce5500_cost;
5015 break;
5017 case PROCESSOR_PPCE6500:
5018 rs6000_cost = &ppce6500_cost;
5019 break;
5021 case PROCESSOR_TITAN:
5022 rs6000_cost = &titan_cost;
5023 break;
5025 case PROCESSOR_POWER4:
5026 case PROCESSOR_POWER5:
5027 rs6000_cost = &power4_cost;
5028 break;
5030 case PROCESSOR_POWER6:
5031 rs6000_cost = &power6_cost;
5032 break;
5034 case PROCESSOR_POWER7:
5035 rs6000_cost = &power7_cost;
5036 break;
5038 case PROCESSOR_POWER8:
5039 rs6000_cost = &power8_cost;
5040 break;
5042 case PROCESSOR_POWER9:
5043 rs6000_cost = &power9_cost;
5044 break;
5046 case PROCESSOR_PPCA2:
5047 rs6000_cost = &ppca2_cost;
5048 break;
5050 default:
5051 gcc_unreachable ();
5054 if (global_init_p)
5056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5057 rs6000_cost->simultaneous_prefetches,
5058 global_options.x_param_values,
5059 global_options_set.x_param_values);
5060 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5061 global_options.x_param_values,
5062 global_options_set.x_param_values);
5063 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5064 rs6000_cost->cache_line_size,
5065 global_options.x_param_values,
5066 global_options_set.x_param_values);
5067 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5068 global_options.x_param_values,
5069 global_options_set.x_param_values);
5071 /* Increase loop peeling limits based on performance analysis. */
5072 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5073 global_options.x_param_values,
5074 global_options_set.x_param_values);
5075 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5076 global_options.x_param_values,
5077 global_options_set.x_param_values);
5079 /* If using typedef char *va_list, signal that
5080 __builtin_va_start (&ap, 0) can be optimized to
5081 ap = __builtin_next_arg (0). */
5082 if (DEFAULT_ABI != ABI_V4)
5083 targetm.expand_builtin_va_start = NULL;
5086 /* Set up single/double float flags.
5087 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5088 then set both flags. */
5089 if (TARGET_HARD_FLOAT && TARGET_FPRS
5090 && rs6000_single_float == 0 && rs6000_double_float == 0)
5091 rs6000_single_float = rs6000_double_float = 1;
5093 /* If not explicitly specified via option, decide whether to generate indexed
5094 load/store instructions. */
5095 if (TARGET_AVOID_XFORM == -1)
5096 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5097 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5098 need indexed accesses and the type used is the scalar type of the element
5099 being loaded or stored. */
5100 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5101 && !TARGET_ALTIVEC);
5103 /* Set the -mrecip options. */
5104 if (rs6000_recip_name)
5106 char *p = ASTRDUP (rs6000_recip_name);
5107 char *q;
5108 unsigned int mask, i;
5109 bool invert;
5111 while ((q = strtok (p, ",")) != NULL)
5113 p = NULL;
5114 if (*q == '!')
5116 invert = true;
5117 q++;
5119 else
5120 invert = false;
5122 if (!strcmp (q, "default"))
5123 mask = ((TARGET_RECIP_PRECISION)
5124 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5125 else
5127 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5128 if (!strcmp (q, recip_options[i].string))
5130 mask = recip_options[i].mask;
5131 break;
5134 if (i == ARRAY_SIZE (recip_options))
5136 error ("unknown option for -mrecip=%s", q);
5137 invert = false;
5138 mask = 0;
5139 ret = false;
5143 if (invert)
5144 rs6000_recip_control &= ~mask;
5145 else
5146 rs6000_recip_control |= mask;
5150 /* Set the builtin mask of the various options used that could affect which
5151 builtins were used. In the past we used target_flags, but we've run out
5152 of bits, and some options like SPE and PAIRED are no longer in
5153 target_flags. */
5154 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5155 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5156 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5157 rs6000_builtin_mask);
5159 /* Initialize all of the registers. */
5160 rs6000_init_hard_regno_mode_ok (global_init_p);
5162 /* Save the initial options in case the user does function specific options */
5163 if (global_init_p)
5164 target_option_default_node = target_option_current_node
5165 = build_target_option_node (&global_options);
5167 /* If not explicitly specified via option, decide whether to generate the
5168 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5169 if (TARGET_LINK_STACK == -1)
5170 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5172 return ret;
5175 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5176 define the target cpu type. */
5178 static void
5179 rs6000_option_override (void)
5181 (void) rs6000_option_override_internal (true);
5183 /* Register machine-specific passes. This needs to be done at start-up.
5184 It's convenient to do it here (like i386 does). */
5185 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5187 struct register_pass_info analyze_swaps_info
5188 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5190 register_pass (&analyze_swaps_info);
5194 /* Implement targetm.vectorize.builtin_mask_for_load. */
5195 static tree
5196 rs6000_builtin_mask_for_load (void)
5198 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5199 if ((TARGET_ALTIVEC && !TARGET_VSX)
5200 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5201 return altivec_builtin_mask_for_load;
5202 else
5203 return 0;
5206 /* Implement LOOP_ALIGN. */
5208 rs6000_loop_align (rtx label)
5210 basic_block bb;
5211 int ninsns;
5213 /* Don't override loop alignment if -falign-loops was specified. */
5214 if (!can_override_loop_align)
5215 return align_loops_log;
5217 bb = BLOCK_FOR_INSN (label);
5218 ninsns = num_loop_insns(bb->loop_father);
5220 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5221 if (ninsns > 4 && ninsns <= 8
5222 && (rs6000_cpu == PROCESSOR_POWER4
5223 || rs6000_cpu == PROCESSOR_POWER5
5224 || rs6000_cpu == PROCESSOR_POWER6
5225 || rs6000_cpu == PROCESSOR_POWER7
5226 || rs6000_cpu == PROCESSOR_POWER8
5227 || rs6000_cpu == PROCESSOR_POWER9))
5228 return 5;
5229 else
5230 return align_loops_log;
5233 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5234 static int
5235 rs6000_loop_align_max_skip (rtx_insn *label)
5237 return (1 << rs6000_loop_align (label)) - 1;
5240 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5241 after applying N number of iterations. This routine does not determine
5242 how may iterations are required to reach desired alignment. */
5244 static bool
5245 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5247 if (is_packed)
5248 return false;
5250 if (TARGET_32BIT)
5252 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5253 return true;
5255 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5256 return true;
5258 return false;
5260 else
5262 if (TARGET_MACHO)
5263 return false;
5265 /* Assuming that all other types are naturally aligned. CHECKME! */
5266 return true;
5270 /* Return true if the vector misalignment factor is supported by the
5271 target. */
5272 static bool
5273 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5274 const_tree type,
5275 int misalignment,
5276 bool is_packed)
5278 if (TARGET_VSX)
5280 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5281 return true;
5283 /* Return if movmisalign pattern is not supported for this mode. */
5284 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5285 return false;
5287 if (misalignment == -1)
5289 /* Misalignment factor is unknown at compile time but we know
5290 it's word aligned. */
5291 if (rs6000_vector_alignment_reachable (type, is_packed))
5293 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5295 if (element_size == 64 || element_size == 32)
5296 return true;
5299 return false;
5302 /* VSX supports word-aligned vector. */
5303 if (misalignment % 4 == 0)
5304 return true;
5306 return false;
5309 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5310 static int
5311 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5312 tree vectype, int misalign)
5314 unsigned elements;
5315 tree elem_type;
5317 switch (type_of_cost)
5319 case scalar_stmt:
5320 case scalar_load:
5321 case scalar_store:
5322 case vector_stmt:
5323 case vector_load:
5324 case vector_store:
5325 case vec_to_scalar:
5326 case scalar_to_vec:
5327 case cond_branch_not_taken:
5328 return 1;
5330 case vec_perm:
5331 if (TARGET_VSX)
5332 return 3;
5333 else
5334 return 1;
5336 case vec_promote_demote:
5337 if (TARGET_VSX)
5338 return 4;
5339 else
5340 return 1;
5342 case cond_branch_taken:
5343 return 3;
5345 case unaligned_load:
5346 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5347 return 1;
5349 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5351 elements = TYPE_VECTOR_SUBPARTS (vectype);
5352 if (elements == 2)
5353 /* Double word aligned. */
5354 return 2;
5356 if (elements == 4)
5358 switch (misalign)
5360 case 8:
5361 /* Double word aligned. */
5362 return 2;
5364 case -1:
5365 /* Unknown misalignment. */
5366 case 4:
5367 case 12:
5368 /* Word aligned. */
5369 return 22;
5371 default:
5372 gcc_unreachable ();
5377 if (TARGET_ALTIVEC)
5378 /* Misaligned loads are not supported. */
5379 gcc_unreachable ();
5381 return 2;
5383 case unaligned_store:
5384 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5385 return 1;
5387 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5389 elements = TYPE_VECTOR_SUBPARTS (vectype);
5390 if (elements == 2)
5391 /* Double word aligned. */
5392 return 2;
5394 if (elements == 4)
5396 switch (misalign)
5398 case 8:
5399 /* Double word aligned. */
5400 return 2;
5402 case -1:
5403 /* Unknown misalignment. */
5404 case 4:
5405 case 12:
5406 /* Word aligned. */
5407 return 23;
5409 default:
5410 gcc_unreachable ();
5415 if (TARGET_ALTIVEC)
5416 /* Misaligned stores are not supported. */
5417 gcc_unreachable ();
5419 return 2;
5421 case vec_construct:
5422 /* This is a rough approximation assuming non-constant elements
5423 constructed into a vector via element insertion. FIXME:
5424 vec_construct is not granular enough for uniformly good
5425 decisions. If the initialization is a splat, this is
5426 cheaper than we estimate. Improve this someday. */
5427 elem_type = TREE_TYPE (vectype);
5428 /* 32-bit vectors loaded into registers are stored as double
5429 precision, so we need 2 permutes, 2 converts, and 1 merge
5430 to construct a vector of short floats from them. */
5431 if (SCALAR_FLOAT_TYPE_P (elem_type)
5432 && TYPE_PRECISION (elem_type) == 32)
5433 return 5;
5434 else
5435 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5437 default:
5438 gcc_unreachable ();
5442 /* Implement targetm.vectorize.preferred_simd_mode. */
5444 static machine_mode
5445 rs6000_preferred_simd_mode (machine_mode mode)
5447 if (TARGET_VSX)
5448 switch (mode)
5450 case DFmode:
5451 return V2DFmode;
5452 default:;
5454 if (TARGET_ALTIVEC || TARGET_VSX)
5455 switch (mode)
5457 case SFmode:
5458 return V4SFmode;
5459 case TImode:
5460 return V1TImode;
5461 case DImode:
5462 return V2DImode;
5463 case SImode:
5464 return V4SImode;
5465 case HImode:
5466 return V8HImode;
5467 case QImode:
5468 return V16QImode;
5469 default:;
5471 if (TARGET_SPE)
5472 switch (mode)
5474 case SFmode:
5475 return V2SFmode;
5476 case SImode:
5477 return V2SImode;
5478 default:;
5480 if (TARGET_PAIRED_FLOAT
5481 && mode == SFmode)
5482 return V2SFmode;
5483 return word_mode;
5486 typedef struct _rs6000_cost_data
5488 struct loop *loop_info;
5489 unsigned cost[3];
5490 } rs6000_cost_data;
5492 /* Test for likely overcommitment of vector hardware resources. If a
5493 loop iteration is relatively large, and too large a percentage of
5494 instructions in the loop are vectorized, the cost model may not
5495 adequately reflect delays from unavailable vector resources.
5496 Penalize the loop body cost for this case. */
5498 static void
5499 rs6000_density_test (rs6000_cost_data *data)
5501 const int DENSITY_PCT_THRESHOLD = 85;
5502 const int DENSITY_SIZE_THRESHOLD = 70;
5503 const int DENSITY_PENALTY = 10;
5504 struct loop *loop = data->loop_info;
5505 basic_block *bbs = get_loop_body (loop);
5506 int nbbs = loop->num_nodes;
5507 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5508 int i, density_pct;
5510 for (i = 0; i < nbbs; i++)
5512 basic_block bb = bbs[i];
5513 gimple_stmt_iterator gsi;
5515 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5517 gimple *stmt = gsi_stmt (gsi);
5518 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5520 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5521 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5522 not_vec_cost++;
5526 free (bbs);
5527 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5529 if (density_pct > DENSITY_PCT_THRESHOLD
5530 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5532 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_NOTE, vect_location,
5535 "density %d%%, cost %d exceeds threshold, penalizing "
5536 "loop body cost by %d%%", density_pct,
5537 vec_cost + not_vec_cost, DENSITY_PENALTY);
5541 /* Implement targetm.vectorize.init_cost. */
5543 static void *
5544 rs6000_init_cost (struct loop *loop_info)
5546 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5547 data->loop_info = loop_info;
5548 data->cost[vect_prologue] = 0;
5549 data->cost[vect_body] = 0;
5550 data->cost[vect_epilogue] = 0;
5551 return data;
5554 /* Implement targetm.vectorize.add_stmt_cost. */
5556 static unsigned
5557 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5558 struct _stmt_vec_info *stmt_info, int misalign,
5559 enum vect_cost_model_location where)
5561 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5562 unsigned retval = 0;
5564 if (flag_vect_cost_model)
5566 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5567 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5568 misalign);
5569 /* Statements in an inner loop relative to the loop being
5570 vectorized are weighted more heavily. The value here is
5571 arbitrary and could potentially be improved with analysis. */
5572 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5573 count *= 50; /* FIXME. */
5575 retval = (unsigned) (count * stmt_cost);
5576 cost_data->cost[where] += retval;
5579 return retval;
5582 /* Implement targetm.vectorize.finish_cost. */
5584 static void
5585 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5586 unsigned *body_cost, unsigned *epilogue_cost)
5588 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5590 if (cost_data->loop_info)
5591 rs6000_density_test (cost_data);
5593 *prologue_cost = cost_data->cost[vect_prologue];
5594 *body_cost = cost_data->cost[vect_body];
5595 *epilogue_cost = cost_data->cost[vect_epilogue];
5598 /* Implement targetm.vectorize.destroy_cost_data. */
5600 static void
5601 rs6000_destroy_cost_data (void *data)
5603 free (data);
5606 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5607 library with vectorized intrinsics. */
5609 static tree
5610 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5611 tree type_in)
5613 char name[32];
5614 const char *suffix = NULL;
5615 tree fntype, new_fndecl, bdecl = NULL_TREE;
5616 int n_args = 1;
5617 const char *bname;
5618 machine_mode el_mode, in_mode;
5619 int n, in_n;
5621 /* Libmass is suitable for unsafe math only as it does not correctly support
5622 parts of IEEE with the required precision such as denormals. Only support
5623 it if we have VSX to use the simd d2 or f4 functions.
5624 XXX: Add variable length support. */
5625 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5626 return NULL_TREE;
5628 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5629 n = TYPE_VECTOR_SUBPARTS (type_out);
5630 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5631 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5632 if (el_mode != in_mode
5633 || n != in_n)
5634 return NULL_TREE;
5636 switch (fn)
5638 CASE_CFN_ATAN2:
5639 CASE_CFN_HYPOT:
5640 CASE_CFN_POW:
5641 n_args = 2;
5642 gcc_fallthrough ();
5644 CASE_CFN_ACOS:
5645 CASE_CFN_ACOSH:
5646 CASE_CFN_ASIN:
5647 CASE_CFN_ASINH:
5648 CASE_CFN_ATAN:
5649 CASE_CFN_ATANH:
5650 CASE_CFN_CBRT:
5651 CASE_CFN_COS:
5652 CASE_CFN_COSH:
5653 CASE_CFN_ERF:
5654 CASE_CFN_ERFC:
5655 CASE_CFN_EXP2:
5656 CASE_CFN_EXP:
5657 CASE_CFN_EXPM1:
5658 CASE_CFN_LGAMMA:
5659 CASE_CFN_LOG10:
5660 CASE_CFN_LOG1P:
5661 CASE_CFN_LOG2:
5662 CASE_CFN_LOG:
5663 CASE_CFN_SIN:
5664 CASE_CFN_SINH:
5665 CASE_CFN_SQRT:
5666 CASE_CFN_TAN:
5667 CASE_CFN_TANH:
5668 if (el_mode == DFmode && n == 2)
5670 bdecl = mathfn_built_in (double_type_node, fn);
5671 suffix = "d2"; /* pow -> powd2 */
5673 else if (el_mode == SFmode && n == 4)
5675 bdecl = mathfn_built_in (float_type_node, fn);
5676 suffix = "4"; /* powf -> powf4 */
5678 else
5679 return NULL_TREE;
5680 if (!bdecl)
5681 return NULL_TREE;
5682 break;
5684 default:
5685 return NULL_TREE;
5688 gcc_assert (suffix != NULL);
5689 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5690 if (!bname)
5691 return NULL_TREE;
5693 strcpy (name, bname + sizeof ("__builtin_") - 1);
5694 strcat (name, suffix);
5696 if (n_args == 1)
5697 fntype = build_function_type_list (type_out, type_in, NULL);
5698 else if (n_args == 2)
5699 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5700 else
5701 gcc_unreachable ();
5703 /* Build a function declaration for the vectorized function. */
5704 new_fndecl = build_decl (BUILTINS_LOCATION,
5705 FUNCTION_DECL, get_identifier (name), fntype);
5706 TREE_PUBLIC (new_fndecl) = 1;
5707 DECL_EXTERNAL (new_fndecl) = 1;
5708 DECL_IS_NOVOPS (new_fndecl) = 1;
5709 TREE_READONLY (new_fndecl) = 1;
5711 return new_fndecl;
5714 /* Returns a function decl for a vectorized version of the builtin function
5715 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5716 if it is not available. */
5718 static tree
5719 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5720 tree type_in)
5722 machine_mode in_mode, out_mode;
5723 int in_n, out_n;
5725 if (TARGET_DEBUG_BUILTIN)
5726 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5727 combined_fn_name (combined_fn (fn)),
5728 GET_MODE_NAME (TYPE_MODE (type_out)),
5729 GET_MODE_NAME (TYPE_MODE (type_in)));
5731 if (TREE_CODE (type_out) != VECTOR_TYPE
5732 || TREE_CODE (type_in) != VECTOR_TYPE
5733 || !TARGET_VECTORIZE_BUILTINS)
5734 return NULL_TREE;
5736 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5737 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5738 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5739 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5741 switch (fn)
5743 CASE_CFN_COPYSIGN:
5744 if (VECTOR_UNIT_VSX_P (V2DFmode)
5745 && out_mode == DFmode && out_n == 2
5746 && in_mode == DFmode && in_n == 2)
5747 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5748 if (VECTOR_UNIT_VSX_P (V4SFmode)
5749 && out_mode == SFmode && out_n == 4
5750 && in_mode == SFmode && in_n == 4)
5751 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5752 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5753 && out_mode == SFmode && out_n == 4
5754 && in_mode == SFmode && in_n == 4)
5755 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5756 break;
5757 CASE_CFN_CEIL:
5758 if (VECTOR_UNIT_VSX_P (V2DFmode)
5759 && out_mode == DFmode && out_n == 2
5760 && in_mode == DFmode && in_n == 2)
5761 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5762 if (VECTOR_UNIT_VSX_P (V4SFmode)
5763 && out_mode == SFmode && out_n == 4
5764 && in_mode == SFmode && in_n == 4)
5765 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5766 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5767 && out_mode == SFmode && out_n == 4
5768 && in_mode == SFmode && in_n == 4)
5769 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5770 break;
5771 CASE_CFN_FLOOR:
5772 if (VECTOR_UNIT_VSX_P (V2DFmode)
5773 && out_mode == DFmode && out_n == 2
5774 && in_mode == DFmode && in_n == 2)
5775 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5776 if (VECTOR_UNIT_VSX_P (V4SFmode)
5777 && out_mode == SFmode && out_n == 4
5778 && in_mode == SFmode && in_n == 4)
5779 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5780 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5781 && out_mode == SFmode && out_n == 4
5782 && in_mode == SFmode && in_n == 4)
5783 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5784 break;
5785 CASE_CFN_FMA:
5786 if (VECTOR_UNIT_VSX_P (V2DFmode)
5787 && out_mode == DFmode && out_n == 2
5788 && in_mode == DFmode && in_n == 2)
5789 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5790 if (VECTOR_UNIT_VSX_P (V4SFmode)
5791 && out_mode == SFmode && out_n == 4
5792 && in_mode == SFmode && in_n == 4)
5793 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5794 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5795 && out_mode == SFmode && out_n == 4
5796 && in_mode == SFmode && in_n == 4)
5797 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5798 break;
5799 CASE_CFN_TRUNC:
5800 if (VECTOR_UNIT_VSX_P (V2DFmode)
5801 && out_mode == DFmode && out_n == 2
5802 && in_mode == DFmode && in_n == 2)
5803 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5804 if (VECTOR_UNIT_VSX_P (V4SFmode)
5805 && out_mode == SFmode && out_n == 4
5806 && in_mode == SFmode && in_n == 4)
5807 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5808 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5809 && out_mode == SFmode && out_n == 4
5810 && in_mode == SFmode && in_n == 4)
5811 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5812 break;
5813 CASE_CFN_NEARBYINT:
5814 if (VECTOR_UNIT_VSX_P (V2DFmode)
5815 && flag_unsafe_math_optimizations
5816 && out_mode == DFmode && out_n == 2
5817 && in_mode == DFmode && in_n == 2)
5818 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5819 if (VECTOR_UNIT_VSX_P (V4SFmode)
5820 && flag_unsafe_math_optimizations
5821 && out_mode == SFmode && out_n == 4
5822 && in_mode == SFmode && in_n == 4)
5823 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5824 break;
5825 CASE_CFN_RINT:
5826 if (VECTOR_UNIT_VSX_P (V2DFmode)
5827 && !flag_trapping_math
5828 && out_mode == DFmode && out_n == 2
5829 && in_mode == DFmode && in_n == 2)
5830 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5831 if (VECTOR_UNIT_VSX_P (V4SFmode)
5832 && !flag_trapping_math
5833 && out_mode == SFmode && out_n == 4
5834 && in_mode == SFmode && in_n == 4)
5835 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5836 break;
5837 default:
5838 break;
5841 /* Generate calls to libmass if appropriate. */
5842 if (rs6000_veclib_handler)
5843 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5845 return NULL_TREE;
5848 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5850 static tree
5851 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5852 tree type_in)
5854 machine_mode in_mode, out_mode;
5855 int in_n, out_n;
5857 if (TARGET_DEBUG_BUILTIN)
5858 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5859 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5860 GET_MODE_NAME (TYPE_MODE (type_out)),
5861 GET_MODE_NAME (TYPE_MODE (type_in)));
5863 if (TREE_CODE (type_out) != VECTOR_TYPE
5864 || TREE_CODE (type_in) != VECTOR_TYPE
5865 || !TARGET_VECTORIZE_BUILTINS)
5866 return NULL_TREE;
5868 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5869 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5870 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5871 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5873 enum rs6000_builtins fn
5874 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5875 switch (fn)
5877 case RS6000_BUILTIN_RSQRTF:
5878 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5879 && out_mode == SFmode && out_n == 4
5880 && in_mode == SFmode && in_n == 4)
5881 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5882 break;
5883 case RS6000_BUILTIN_RSQRT:
5884 if (VECTOR_UNIT_VSX_P (V2DFmode)
5885 && out_mode == DFmode && out_n == 2
5886 && in_mode == DFmode && in_n == 2)
5887 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5888 break;
5889 case RS6000_BUILTIN_RECIPF:
5890 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5891 && out_mode == SFmode && out_n == 4
5892 && in_mode == SFmode && in_n == 4)
5893 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5894 break;
5895 case RS6000_BUILTIN_RECIP:
5896 if (VECTOR_UNIT_VSX_P (V2DFmode)
5897 && out_mode == DFmode && out_n == 2
5898 && in_mode == DFmode && in_n == 2)
5899 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5900 break;
5901 default:
5902 break;
5904 return NULL_TREE;
5907 /* Default CPU string for rs6000*_file_start functions. */
5908 static const char *rs6000_default_cpu;
5910 /* Do anything needed at the start of the asm file. */
5912 static void
5913 rs6000_file_start (void)
5915 char buffer[80];
5916 const char *start = buffer;
5917 FILE *file = asm_out_file;
5919 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5921 default_file_start ();
5923 if (flag_verbose_asm)
5925 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5927 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5929 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5930 start = "";
5933 if (global_options_set.x_rs6000_cpu_index)
5935 fprintf (file, "%s -mcpu=%s", start,
5936 processor_target_table[rs6000_cpu_index].name);
5937 start = "";
5940 if (global_options_set.x_rs6000_tune_index)
5942 fprintf (file, "%s -mtune=%s", start,
5943 processor_target_table[rs6000_tune_index].name);
5944 start = "";
5947 if (PPC405_ERRATUM77)
5949 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5950 start = "";
5953 #ifdef USING_ELFOS_H
5954 switch (rs6000_sdata)
5956 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5957 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5958 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5959 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5962 if (rs6000_sdata && g_switch_value)
5964 fprintf (file, "%s -G %d", start,
5965 g_switch_value);
5966 start = "";
5968 #endif
5970 if (*start == '\0')
5971 putc ('\n', file);
5974 #ifdef USING_ELFOS_H
5975 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5976 && !global_options_set.x_rs6000_cpu_index)
5978 fputs ("\t.machine ", asm_out_file);
5979 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5980 fputs ("power9\n", asm_out_file);
5981 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5982 fputs ("power8\n", asm_out_file);
5983 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5984 fputs ("power7\n", asm_out_file);
5985 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5986 fputs ("power6\n", asm_out_file);
5987 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5988 fputs ("power5\n", asm_out_file);
5989 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5990 fputs ("power4\n", asm_out_file);
5991 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5992 fputs ("ppc64\n", asm_out_file);
5993 else
5994 fputs ("ppc\n", asm_out_file);
5996 #endif
5998 if (DEFAULT_ABI == ABI_ELFv2)
5999 fprintf (file, "\t.abiversion 2\n");
6003 /* Return nonzero if this function is known to have a null epilogue. */
6006 direct_return (void)
6008 if (reload_completed)
6010 rs6000_stack_t *info = rs6000_stack_info ();
6012 if (info->first_gp_reg_save == 32
6013 && info->first_fp_reg_save == 64
6014 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6015 && ! info->lr_save_p
6016 && ! info->cr_save_p
6017 && info->vrsave_size == 0
6018 && ! info->push_p)
6019 return 1;
6022 return 0;
6025 /* Return the number of instructions it takes to form a constant in an
6026 integer register. */
6029 num_insns_constant_wide (HOST_WIDE_INT value)
6031 /* signed constant loadable with addi */
6032 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6033 return 1;
6035 /* constant loadable with addis */
6036 else if ((value & 0xffff) == 0
6037 && (value >> 31 == -1 || value >> 31 == 0))
6038 return 1;
6040 else if (TARGET_POWERPC64)
6042 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6043 HOST_WIDE_INT high = value >> 31;
6045 if (high == 0 || high == -1)
6046 return 2;
6048 high >>= 1;
6050 if (low == 0)
6051 return num_insns_constant_wide (high) + 1;
6052 else if (high == 0)
6053 return num_insns_constant_wide (low) + 1;
6054 else
6055 return (num_insns_constant_wide (high)
6056 + num_insns_constant_wide (low) + 1);
6059 else
6060 return 2;
6064 num_insns_constant (rtx op, machine_mode mode)
6066 HOST_WIDE_INT low, high;
6068 switch (GET_CODE (op))
6070 case CONST_INT:
6071 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6072 && rs6000_is_valid_and_mask (op, mode))
6073 return 2;
6074 else
6075 return num_insns_constant_wide (INTVAL (op));
6077 case CONST_WIDE_INT:
6079 int i;
6080 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6081 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6082 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6083 return ins;
6086 case CONST_DOUBLE:
6087 if (mode == SFmode || mode == SDmode)
6089 long l;
6091 if (DECIMAL_FLOAT_MODE_P (mode))
6092 REAL_VALUE_TO_TARGET_DECIMAL32
6093 (*CONST_DOUBLE_REAL_VALUE (op), l);
6094 else
6095 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6096 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6099 long l[2];
6100 if (DECIMAL_FLOAT_MODE_P (mode))
6101 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6102 else
6103 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6104 high = l[WORDS_BIG_ENDIAN == 0];
6105 low = l[WORDS_BIG_ENDIAN != 0];
6107 if (TARGET_32BIT)
6108 return (num_insns_constant_wide (low)
6109 + num_insns_constant_wide (high));
6110 else
6112 if ((high == 0 && low >= 0)
6113 || (high == -1 && low < 0))
6114 return num_insns_constant_wide (low);
6116 else if (rs6000_is_valid_and_mask (op, mode))
6117 return 2;
6119 else if (low == 0)
6120 return num_insns_constant_wide (high) + 1;
6122 else
6123 return (num_insns_constant_wide (high)
6124 + num_insns_constant_wide (low) + 1);
6127 default:
6128 gcc_unreachable ();
6132 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6133 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6134 corresponding element of the vector, but for V4SFmode and V2SFmode,
6135 the corresponding "float" is interpreted as an SImode integer. */
6137 HOST_WIDE_INT
6138 const_vector_elt_as_int (rtx op, unsigned int elt)
6140 rtx tmp;
6142 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6143 gcc_assert (GET_MODE (op) != V2DImode
6144 && GET_MODE (op) != V2DFmode);
6146 tmp = CONST_VECTOR_ELT (op, elt);
6147 if (GET_MODE (op) == V4SFmode
6148 || GET_MODE (op) == V2SFmode)
6149 tmp = gen_lowpart (SImode, tmp);
6150 return INTVAL (tmp);
6153 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6154 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6155 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6156 all items are set to the same value and contain COPIES replicas of the
6157 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6158 operand and the others are set to the value of the operand's msb. */
6160 static bool
6161 vspltis_constant (rtx op, unsigned step, unsigned copies)
6163 machine_mode mode = GET_MODE (op);
6164 machine_mode inner = GET_MODE_INNER (mode);
6166 unsigned i;
6167 unsigned nunits;
6168 unsigned bitsize;
6169 unsigned mask;
6171 HOST_WIDE_INT val;
6172 HOST_WIDE_INT splat_val;
6173 HOST_WIDE_INT msb_val;
6175 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6176 return false;
6178 nunits = GET_MODE_NUNITS (mode);
6179 bitsize = GET_MODE_BITSIZE (inner);
6180 mask = GET_MODE_MASK (inner);
6182 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6183 splat_val = val;
6184 msb_val = val >= 0 ? 0 : -1;
6186 /* Construct the value to be splatted, if possible. If not, return 0. */
6187 for (i = 2; i <= copies; i *= 2)
6189 HOST_WIDE_INT small_val;
6190 bitsize /= 2;
6191 small_val = splat_val >> bitsize;
6192 mask >>= bitsize;
6193 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6194 return false;
6195 splat_val = small_val;
6198 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6199 if (EASY_VECTOR_15 (splat_val))
6202 /* Also check if we can splat, and then add the result to itself. Do so if
6203 the value is positive, of if the splat instruction is using OP's mode;
6204 for splat_val < 0, the splat and the add should use the same mode. */
6205 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6206 && (splat_val >= 0 || (step == 1 && copies == 1)))
6209 /* Also check if are loading up the most significant bit which can be done by
6210 loading up -1 and shifting the value left by -1. */
6211 else if (EASY_VECTOR_MSB (splat_val, inner))
6214 else
6215 return false;
6217 /* Check if VAL is present in every STEP-th element, and the
6218 other elements are filled with its most significant bit. */
6219 for (i = 1; i < nunits; ++i)
6221 HOST_WIDE_INT desired_val;
6222 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6223 if ((i & (step - 1)) == 0)
6224 desired_val = val;
6225 else
6226 desired_val = msb_val;
6228 if (desired_val != const_vector_elt_as_int (op, elt))
6229 return false;
6232 return true;
6235 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6236 instruction, filling in the bottom elements with 0 or -1.
6238 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6239 for the number of zeroes to shift in, or negative for the number of 0xff
6240 bytes to shift in.
6242 OP is a CONST_VECTOR. */
6245 vspltis_shifted (rtx op)
6247 machine_mode mode = GET_MODE (op);
6248 machine_mode inner = GET_MODE_INNER (mode);
6250 unsigned i, j;
6251 unsigned nunits;
6252 unsigned mask;
6254 HOST_WIDE_INT val;
6256 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6257 return false;
6259 /* We need to create pseudo registers to do the shift, so don't recognize
6260 shift vector constants after reload. */
6261 if (!can_create_pseudo_p ())
6262 return false;
6264 nunits = GET_MODE_NUNITS (mode);
6265 mask = GET_MODE_MASK (inner);
6267 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6269 /* Check if the value can really be the operand of a vspltis[bhw]. */
6270 if (EASY_VECTOR_15 (val))
6273 /* Also check if we are loading up the most significant bit which can be done
6274 by loading up -1 and shifting the value left by -1. */
6275 else if (EASY_VECTOR_MSB (val, inner))
6278 else
6279 return 0;
6281 /* Check if VAL is present in every STEP-th element until we find elements
6282 that are 0 or all 1 bits. */
6283 for (i = 1; i < nunits; ++i)
6285 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6286 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6288 /* If the value isn't the splat value, check for the remaining elements
6289 being 0/-1. */
6290 if (val != elt_val)
6292 if (elt_val == 0)
6294 for (j = i+1; j < nunits; ++j)
6296 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6297 if (const_vector_elt_as_int (op, elt2) != 0)
6298 return 0;
6301 return (nunits - i) * GET_MODE_SIZE (inner);
6304 else if ((elt_val & mask) == mask)
6306 for (j = i+1; j < nunits; ++j)
6308 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6309 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6310 return 0;
6313 return -((nunits - i) * GET_MODE_SIZE (inner));
6316 else
6317 return 0;
6321 /* If all elements are equal, we don't need to do VLSDOI. */
6322 return 0;
6326 /* Return true if OP is of the given MODE and can be synthesized
6327 with a vspltisb, vspltish or vspltisw. */
6329 bool
6330 easy_altivec_constant (rtx op, machine_mode mode)
6332 unsigned step, copies;
6334 if (mode == VOIDmode)
6335 mode = GET_MODE (op);
6336 else if (mode != GET_MODE (op))
6337 return false;
6339 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6340 constants. */
6341 if (mode == V2DFmode)
6342 return zero_constant (op, mode);
6344 else if (mode == V2DImode)
6346 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6347 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6348 return false;
6350 if (zero_constant (op, mode))
6351 return true;
6353 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6354 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6355 return true;
6357 return false;
6360 /* V1TImode is a special container for TImode. Ignore for now. */
6361 else if (mode == V1TImode)
6362 return false;
6364 /* Start with a vspltisw. */
6365 step = GET_MODE_NUNITS (mode) / 4;
6366 copies = 1;
6368 if (vspltis_constant (op, step, copies))
6369 return true;
6371 /* Then try with a vspltish. */
6372 if (step == 1)
6373 copies <<= 1;
6374 else
6375 step >>= 1;
6377 if (vspltis_constant (op, step, copies))
6378 return true;
6380 /* And finally a vspltisb. */
6381 if (step == 1)
6382 copies <<= 1;
6383 else
6384 step >>= 1;
6386 if (vspltis_constant (op, step, copies))
6387 return true;
6389 if (vspltis_shifted (op) != 0)
6390 return true;
6392 return false;
6395 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6396 result is OP. Abort if it is not possible. */
6399 gen_easy_altivec_constant (rtx op)
6401 machine_mode mode = GET_MODE (op);
6402 int nunits = GET_MODE_NUNITS (mode);
6403 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6404 unsigned step = nunits / 4;
6405 unsigned copies = 1;
6407 /* Start with a vspltisw. */
6408 if (vspltis_constant (op, step, copies))
6409 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6411 /* Then try with a vspltish. */
6412 if (step == 1)
6413 copies <<= 1;
6414 else
6415 step >>= 1;
6417 if (vspltis_constant (op, step, copies))
6418 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6420 /* And finally a vspltisb. */
6421 if (step == 1)
6422 copies <<= 1;
6423 else
6424 step >>= 1;
6426 if (vspltis_constant (op, step, copies))
6427 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6429 gcc_unreachable ();
6432 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6433 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6435 Return the number of instructions needed (1 or 2) into the address pointed
6436 via NUM_INSNS_PTR.
6438 Return the constant that is being split via CONSTANT_PTR. */
6440 bool
6441 xxspltib_constant_p (rtx op,
6442 machine_mode mode,
6443 int *num_insns_ptr,
6444 int *constant_ptr)
6446 size_t nunits = GET_MODE_NUNITS (mode);
6447 size_t i;
6448 HOST_WIDE_INT value;
6449 rtx element;
6451 /* Set the returned values to out of bound values. */
6452 *num_insns_ptr = -1;
6453 *constant_ptr = 256;
6455 if (!TARGET_P9_VECTOR)
6456 return false;
6458 if (mode == VOIDmode)
6459 mode = GET_MODE (op);
6461 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6462 return false;
6464 /* Handle (vec_duplicate <constant>). */
6465 if (GET_CODE (op) == VEC_DUPLICATE)
6467 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6468 && mode != V2DImode)
6469 return false;
6471 element = XEXP (op, 0);
6472 if (!CONST_INT_P (element))
6473 return false;
6475 value = INTVAL (element);
6476 if (!IN_RANGE (value, -128, 127))
6477 return false;
6480 /* Handle (const_vector [...]). */
6481 else if (GET_CODE (op) == CONST_VECTOR)
6483 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6484 && mode != V2DImode)
6485 return false;
6487 element = CONST_VECTOR_ELT (op, 0);
6488 if (!CONST_INT_P (element))
6489 return false;
6491 value = INTVAL (element);
6492 if (!IN_RANGE (value, -128, 127))
6493 return false;
6495 for (i = 1; i < nunits; i++)
6497 element = CONST_VECTOR_ELT (op, i);
6498 if (!CONST_INT_P (element))
6499 return false;
6501 if (value != INTVAL (element))
6502 return false;
6506 /* Handle integer constants being loaded into the upper part of the VSX
6507 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6508 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6509 else if (CONST_INT_P (op))
6511 if (!SCALAR_INT_MODE_P (mode))
6512 return false;
6514 value = INTVAL (op);
6515 if (!IN_RANGE (value, -128, 127))
6516 return false;
6518 if (!IN_RANGE (value, -1, 0))
6520 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6521 return false;
6523 if (EASY_VECTOR_15 (value))
6524 return false;
6528 else
6529 return false;
6531 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6532 sign extend. Special case 0/-1 to allow getting any VSX register instead
6533 of an Altivec register. */
6534 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6535 && EASY_VECTOR_15 (value))
6536 return false;
6538 /* Return # of instructions and the constant byte for XXSPLTIB. */
6539 if (mode == V16QImode)
6540 *num_insns_ptr = 1;
6542 else if (IN_RANGE (value, -1, 0))
6543 *num_insns_ptr = 1;
6545 else
6546 *num_insns_ptr = 2;
6548 *constant_ptr = (int) value;
6549 return true;
6552 const char *
6553 output_vec_const_move (rtx *operands)
6555 int cst, cst2, shift;
6556 machine_mode mode;
6557 rtx dest, vec;
6559 dest = operands[0];
6560 vec = operands[1];
6561 mode = GET_MODE (dest);
6563 if (TARGET_VSX)
6565 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6566 int xxspltib_value = 256;
6567 int num_insns = -1;
6569 if (zero_constant (vec, mode))
6571 if (TARGET_P9_VECTOR)
6572 return "xxspltib %x0,0";
6574 else if (dest_vmx_p)
6575 return "vspltisw %0,0";
6577 else
6578 return "xxlxor %x0,%x0,%x0";
6581 if (all_ones_constant (vec, mode))
6583 if (TARGET_P9_VECTOR)
6584 return "xxspltib %x0,255";
6586 else if (dest_vmx_p)
6587 return "vspltisw %0,-1";
6589 else if (TARGET_P8_VECTOR)
6590 return "xxlorc %x0,%x0,%x0";
6592 else
6593 gcc_unreachable ();
6596 if (TARGET_P9_VECTOR
6597 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6599 if (num_insns == 1)
6601 operands[2] = GEN_INT (xxspltib_value & 0xff);
6602 return "xxspltib %x0,%2";
6605 return "#";
6609 if (TARGET_ALTIVEC)
6611 rtx splat_vec;
6613 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6614 if (zero_constant (vec, mode))
6615 return "vspltisw %0,0";
6617 if (all_ones_constant (vec, mode))
6618 return "vspltisw %0,-1";
6620 /* Do we need to construct a value using VSLDOI? */
6621 shift = vspltis_shifted (vec);
6622 if (shift != 0)
6623 return "#";
6625 splat_vec = gen_easy_altivec_constant (vec);
6626 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6627 operands[1] = XEXP (splat_vec, 0);
6628 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6629 return "#";
6631 switch (GET_MODE (splat_vec))
6633 case V4SImode:
6634 return "vspltisw %0,%1";
6636 case V8HImode:
6637 return "vspltish %0,%1";
6639 case V16QImode:
6640 return "vspltisb %0,%1";
6642 default:
6643 gcc_unreachable ();
6647 gcc_assert (TARGET_SPE);
6649 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6650 pattern of V1DI, V4HI, and V2SF.
6652 FIXME: We should probably return # and add post reload
6653 splitters for these, but this way is so easy ;-). */
6654 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6655 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6656 operands[1] = CONST_VECTOR_ELT (vec, 0);
6657 operands[2] = CONST_VECTOR_ELT (vec, 1);
6658 if (cst == cst2)
6659 return "li %0,%1\n\tevmergelo %0,%0,%0";
6660 else if (WORDS_BIG_ENDIAN)
6661 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6662 else
6663 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6666 /* Initialize TARGET of vector PAIRED to VALS. */
6668 void
6669 paired_expand_vector_init (rtx target, rtx vals)
6671 machine_mode mode = GET_MODE (target);
6672 int n_elts = GET_MODE_NUNITS (mode);
6673 int n_var = 0;
6674 rtx x, new_rtx, tmp, constant_op, op1, op2;
6675 int i;
6677 for (i = 0; i < n_elts; ++i)
6679 x = XVECEXP (vals, 0, i);
6680 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6681 ++n_var;
6683 if (n_var == 0)
6685 /* Load from constant pool. */
6686 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6687 return;
6690 if (n_var == 2)
6692 /* The vector is initialized only with non-constants. */
6693 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6694 XVECEXP (vals, 0, 1));
6696 emit_move_insn (target, new_rtx);
6697 return;
6700 /* One field is non-constant and the other one is a constant. Load the
6701 constant from the constant pool and use ps_merge instruction to
6702 construct the whole vector. */
6703 op1 = XVECEXP (vals, 0, 0);
6704 op2 = XVECEXP (vals, 0, 1);
6706 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6708 tmp = gen_reg_rtx (GET_MODE (constant_op));
6709 emit_move_insn (tmp, constant_op);
6711 if (CONSTANT_P (op1))
6712 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6713 else
6714 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6716 emit_move_insn (target, new_rtx);
6719 void
6720 paired_expand_vector_move (rtx operands[])
6722 rtx op0 = operands[0], op1 = operands[1];
6724 emit_move_insn (op0, op1);
6727 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6728 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6729 operands for the relation operation COND. This is a recursive
6730 function. */
6732 static void
6733 paired_emit_vector_compare (enum rtx_code rcode,
6734 rtx dest, rtx op0, rtx op1,
6735 rtx cc_op0, rtx cc_op1)
6737 rtx tmp = gen_reg_rtx (V2SFmode);
6738 rtx tmp1, max, min;
6740 gcc_assert (TARGET_PAIRED_FLOAT);
6741 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6743 switch (rcode)
6745 case LT:
6746 case LTU:
6747 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6748 return;
6749 case GE:
6750 case GEU:
6751 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6752 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6753 return;
6754 case LE:
6755 case LEU:
6756 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6757 return;
6758 case GT:
6759 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6760 return;
6761 case EQ:
6762 tmp1 = gen_reg_rtx (V2SFmode);
6763 max = gen_reg_rtx (V2SFmode);
6764 min = gen_reg_rtx (V2SFmode);
6765 gen_reg_rtx (V2SFmode);
6767 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6768 emit_insn (gen_selv2sf4
6769 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6770 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6771 emit_insn (gen_selv2sf4
6772 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6773 emit_insn (gen_subv2sf3 (tmp1, min, max));
6774 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6775 return;
6776 case NE:
6777 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6778 return;
6779 case UNLE:
6780 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6781 return;
6782 case UNLT:
6783 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6784 return;
6785 case UNGE:
6786 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6787 return;
6788 case UNGT:
6789 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6790 return;
6791 default:
6792 gcc_unreachable ();
6795 return;
6798 /* Emit vector conditional expression.
6799 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6800 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6803 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6804 rtx cond, rtx cc_op0, rtx cc_op1)
6806 enum rtx_code rcode = GET_CODE (cond);
6808 if (!TARGET_PAIRED_FLOAT)
6809 return 0;
6811 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6813 return 1;
6816 /* Initialize vector TARGET to VALS. */
6818 void
6819 rs6000_expand_vector_init (rtx target, rtx vals)
6821 machine_mode mode = GET_MODE (target);
6822 machine_mode inner_mode = GET_MODE_INNER (mode);
6823 int n_elts = GET_MODE_NUNITS (mode);
6824 int n_var = 0, one_var = -1;
6825 bool all_same = true, all_const_zero = true;
6826 rtx x, mem;
6827 int i;
6829 for (i = 0; i < n_elts; ++i)
6831 x = XVECEXP (vals, 0, i);
6832 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6833 ++n_var, one_var = i;
6834 else if (x != CONST0_RTX (inner_mode))
6835 all_const_zero = false;
6837 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6838 all_same = false;
6841 if (n_var == 0)
6843 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6844 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6845 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6847 /* Zero register. */
6848 emit_move_insn (target, CONST0_RTX (mode));
6849 return;
6851 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6853 /* Splat immediate. */
6854 emit_insn (gen_rtx_SET (target, const_vec));
6855 return;
6857 else
6859 /* Load from constant pool. */
6860 emit_move_insn (target, const_vec);
6861 return;
6865 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6866 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6868 rtx op0 = XVECEXP (vals, 0, 0);
6869 rtx op1 = XVECEXP (vals, 0, 1);
6870 if (all_same)
6872 if (!MEM_P (op0) && !REG_P (op0))
6873 op0 = force_reg (inner_mode, op0);
6874 if (mode == V2DFmode)
6875 emit_insn (gen_vsx_splat_v2df (target, op0));
6876 else
6877 emit_insn (gen_vsx_splat_v2di (target, op0));
6879 else
6881 op0 = force_reg (inner_mode, op0);
6882 op1 = force_reg (inner_mode, op1);
6883 if (mode == V2DFmode)
6884 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6885 else
6886 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6888 return;
6891 /* Special case initializing vector int if we are on 64-bit systems with
6892 direct move or we have the ISA 3.0 instructions. */
6893 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6894 && TARGET_DIRECT_MOVE_64BIT)
6896 if (all_same)
6898 rtx element0 = XVECEXP (vals, 0, 0);
6899 if (MEM_P (element0))
6900 element0 = rs6000_address_for_fpconvert (element0);
6901 else
6902 element0 = force_reg (SImode, element0);
6904 if (TARGET_P9_VECTOR)
6905 emit_insn (gen_vsx_splat_v4si (target, element0));
6906 else
6908 rtx tmp = gen_reg_rtx (DImode);
6909 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6910 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6912 return;
6914 else
6916 rtx elements[4];
6917 size_t i;
6919 for (i = 0; i < 4; i++)
6921 elements[i] = XVECEXP (vals, 0, i);
6922 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6923 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6926 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6927 elements[2], elements[3]));
6928 return;
6932 /* With single precision floating point on VSX, know that internally single
6933 precision is actually represented as a double, and either make 2 V2DF
6934 vectors, and convert these vectors to single precision, or do one
6935 conversion, and splat the result to the other elements. */
6936 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6938 if (all_same)
6940 rtx element0 = XVECEXP (vals, 0, 0);
6942 if (TARGET_P9_VECTOR)
6944 if (MEM_P (element0))
6945 element0 = rs6000_address_for_fpconvert (element0);
6947 emit_insn (gen_vsx_splat_v4sf (target, element0));
6950 else
6952 rtx freg = gen_reg_rtx (V4SFmode);
6953 rtx sreg = force_reg (SFmode, element0);
6954 rtx cvt = (TARGET_XSCVDPSPN
6955 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6956 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6958 emit_insn (cvt);
6959 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6960 const0_rtx));
6963 else
6965 rtx dbl_even = gen_reg_rtx (V2DFmode);
6966 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6967 rtx flt_even = gen_reg_rtx (V4SFmode);
6968 rtx flt_odd = gen_reg_rtx (V4SFmode);
6969 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6970 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6971 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6972 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6974 /* Use VMRGEW if we can instead of doing a permute. */
6975 if (TARGET_P8_VECTOR)
6977 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6978 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6979 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6980 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6981 if (BYTES_BIG_ENDIAN)
6982 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6983 else
6984 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6986 else
6988 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6989 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6990 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6991 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6992 rs6000_expand_extract_even (target, flt_even, flt_odd);
6995 return;
6998 /* Special case initializing vector short/char that are splats if we are on
6999 64-bit systems with direct move. */
7000 if (all_same && TARGET_DIRECT_MOVE_64BIT
7001 && (mode == V16QImode || mode == V8HImode))
7003 rtx op0 = XVECEXP (vals, 0, 0);
7004 rtx di_tmp = gen_reg_rtx (DImode);
7006 if (!REG_P (op0))
7007 op0 = force_reg (GET_MODE_INNER (mode), op0);
7009 if (mode == V16QImode)
7011 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7012 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7013 return;
7016 if (mode == V8HImode)
7018 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7019 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7020 return;
7024 /* Store value to stack temp. Load vector element. Splat. However, splat
7025 of 64-bit items is not supported on Altivec. */
7026 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7028 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7029 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7030 XVECEXP (vals, 0, 0));
7031 x = gen_rtx_UNSPEC (VOIDmode,
7032 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7033 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7034 gen_rtvec (2,
7035 gen_rtx_SET (target, mem),
7036 x)));
7037 x = gen_rtx_VEC_SELECT (inner_mode, target,
7038 gen_rtx_PARALLEL (VOIDmode,
7039 gen_rtvec (1, const0_rtx)));
7040 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7041 return;
7044 /* One field is non-constant. Load constant then overwrite
7045 varying field. */
7046 if (n_var == 1)
7048 rtx copy = copy_rtx (vals);
7050 /* Load constant part of vector, substitute neighboring value for
7051 varying element. */
7052 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7053 rs6000_expand_vector_init (target, copy);
7055 /* Insert variable. */
7056 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7057 return;
7060 /* Construct the vector in memory one field at a time
7061 and load the whole vector. */
7062 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7063 for (i = 0; i < n_elts; i++)
7064 emit_move_insn (adjust_address_nv (mem, inner_mode,
7065 i * GET_MODE_SIZE (inner_mode)),
7066 XVECEXP (vals, 0, i));
7067 emit_move_insn (target, mem);
7070 /* Set field ELT of TARGET to VAL. */
7072 void
7073 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7075 machine_mode mode = GET_MODE (target);
7076 machine_mode inner_mode = GET_MODE_INNER (mode);
7077 rtx reg = gen_reg_rtx (mode);
7078 rtx mask, mem, x;
7079 int width = GET_MODE_SIZE (inner_mode);
7080 int i;
7082 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7084 rtx (*set_func) (rtx, rtx, rtx, rtx)
7085 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
7086 emit_insn (set_func (target, target, val, GEN_INT (elt)));
7087 return;
7090 /* Simplify setting single element vectors like V1TImode. */
7091 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7093 emit_move_insn (target, gen_lowpart (mode, val));
7094 return;
7097 /* Load single variable value. */
7098 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7099 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7100 x = gen_rtx_UNSPEC (VOIDmode,
7101 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7102 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7103 gen_rtvec (2,
7104 gen_rtx_SET (reg, mem),
7105 x)));
7107 /* Linear sequence. */
7108 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7109 for (i = 0; i < 16; ++i)
7110 XVECEXP (mask, 0, i) = GEN_INT (i);
7112 /* Set permute mask to insert element into target. */
7113 for (i = 0; i < width; ++i)
7114 XVECEXP (mask, 0, elt*width + i)
7115 = GEN_INT (i + 0x10);
7116 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7118 if (BYTES_BIG_ENDIAN)
7119 x = gen_rtx_UNSPEC (mode,
7120 gen_rtvec (3, target, reg,
7121 force_reg (V16QImode, x)),
7122 UNSPEC_VPERM);
7123 else
7125 if (TARGET_P9_VECTOR)
7126 x = gen_rtx_UNSPEC (mode,
7127 gen_rtvec (3, target, reg,
7128 force_reg (V16QImode, x)),
7129 UNSPEC_VPERMR);
7130 else
7132 /* Invert selector. We prefer to generate VNAND on P8 so
7133 that future fusion opportunities can kick in, but must
7134 generate VNOR elsewhere. */
7135 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7136 rtx iorx = (TARGET_P8_VECTOR
7137 ? gen_rtx_IOR (V16QImode, notx, notx)
7138 : gen_rtx_AND (V16QImode, notx, notx));
7139 rtx tmp = gen_reg_rtx (V16QImode);
7140 emit_insn (gen_rtx_SET (tmp, iorx));
7142 /* Permute with operands reversed and adjusted selector. */
7143 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7144 UNSPEC_VPERM);
7148 emit_insn (gen_rtx_SET (target, x));
7151 /* Extract field ELT from VEC into TARGET. */
7153 void
7154 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7156 machine_mode mode = GET_MODE (vec);
7157 machine_mode inner_mode = GET_MODE_INNER (mode);
7158 rtx mem;
7160 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7162 switch (mode)
7164 default:
7165 break;
7166 case V1TImode:
7167 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7168 emit_move_insn (target, gen_lowpart (TImode, vec));
7169 break;
7170 case V2DFmode:
7171 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7172 return;
7173 case V2DImode:
7174 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7175 return;
7176 case V4SFmode:
7177 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7178 return;
7179 case V16QImode:
7180 if (TARGET_DIRECT_MOVE_64BIT)
7182 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7183 return;
7185 else
7186 break;
7187 case V8HImode:
7188 if (TARGET_DIRECT_MOVE_64BIT)
7190 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7191 return;
7193 else
7194 break;
7195 case V4SImode:
7196 if (TARGET_DIRECT_MOVE_64BIT)
7198 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7199 return;
7201 break;
7204 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7205 && TARGET_DIRECT_MOVE_64BIT)
7207 if (GET_MODE (elt) != DImode)
7209 rtx tmp = gen_reg_rtx (DImode);
7210 convert_move (tmp, elt, 0);
7211 elt = tmp;
7214 switch (mode)
7216 case V2DFmode:
7217 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7218 return;
7220 case V2DImode:
7221 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7222 return;
7224 case V4SFmode:
7225 if (TARGET_UPPER_REGS_SF)
7227 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7228 return;
7230 break;
7232 case V4SImode:
7233 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7234 return;
7236 case V8HImode:
7237 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7238 return;
7240 case V16QImode:
7241 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7242 return;
7244 default:
7245 gcc_unreachable ();
7249 gcc_assert (CONST_INT_P (elt));
7251 /* Allocate mode-sized buffer. */
7252 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7254 emit_move_insn (mem, vec);
7256 /* Add offset to field within buffer matching vector element. */
7257 mem = adjust_address_nv (mem, inner_mode,
7258 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7260 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7263 /* Helper function to return the register number of a RTX. */
7264 static inline int
7265 regno_or_subregno (rtx op)
7267 if (REG_P (op))
7268 return REGNO (op);
7269 else if (SUBREG_P (op))
7270 return subreg_regno (op);
7271 else
7272 gcc_unreachable ();
7275 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7276 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7277 temporary (BASE_TMP) to fixup the address. Return the new memory address
7278 that is valid for reads or writes to a given register (SCALAR_REG). */
7281 rs6000_adjust_vec_address (rtx scalar_reg,
7282 rtx mem,
7283 rtx element,
7284 rtx base_tmp,
7285 machine_mode scalar_mode)
7287 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7288 rtx addr = XEXP (mem, 0);
7289 rtx element_offset;
7290 rtx new_addr;
7291 bool valid_addr_p;
7293 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7294 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7296 /* Calculate what we need to add to the address to get the element
7297 address. */
7298 if (CONST_INT_P (element))
7299 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7300 else
7302 int byte_shift = exact_log2 (scalar_size);
7303 gcc_assert (byte_shift >= 0);
7305 if (byte_shift == 0)
7306 element_offset = element;
7308 else
7310 if (TARGET_POWERPC64)
7311 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7312 else
7313 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7315 element_offset = base_tmp;
7319 /* Create the new address pointing to the element within the vector. If we
7320 are adding 0, we don't have to change the address. */
7321 if (element_offset == const0_rtx)
7322 new_addr = addr;
7324 /* A simple indirect address can be converted into a reg + offset
7325 address. */
7326 else if (REG_P (addr) || SUBREG_P (addr))
7327 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7329 /* Optimize D-FORM addresses with constant offset with a constant element, to
7330 include the element offset in the address directly. */
7331 else if (GET_CODE (addr) == PLUS)
7333 rtx op0 = XEXP (addr, 0);
7334 rtx op1 = XEXP (addr, 1);
7335 rtx insn;
7337 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7338 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7340 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7341 rtx offset_rtx = GEN_INT (offset);
7343 if (IN_RANGE (offset, -32768, 32767)
7344 && (scalar_size < 8 || (offset & 0x3) == 0))
7345 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7346 else
7348 emit_move_insn (base_tmp, offset_rtx);
7349 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7352 else
7354 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7355 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7357 /* Note, ADDI requires the register being added to be a base
7358 register. If the register was R0, load it up into the temporary
7359 and do the add. */
7360 if (op1_reg_p
7361 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7363 insn = gen_add3_insn (base_tmp, op1, element_offset);
7364 gcc_assert (insn != NULL_RTX);
7365 emit_insn (insn);
7368 else if (ele_reg_p
7369 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7371 insn = gen_add3_insn (base_tmp, element_offset, op1);
7372 gcc_assert (insn != NULL_RTX);
7373 emit_insn (insn);
7376 else
7378 emit_move_insn (base_tmp, op1);
7379 emit_insn (gen_add2_insn (base_tmp, element_offset));
7382 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7386 else
7388 emit_move_insn (base_tmp, addr);
7389 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7392 /* If we have a PLUS, we need to see whether the particular register class
7393 allows for D-FORM or X-FORM addressing. */
7394 if (GET_CODE (new_addr) == PLUS)
7396 rtx op1 = XEXP (new_addr, 1);
7397 addr_mask_type addr_mask;
7398 int scalar_regno = regno_or_subregno (scalar_reg);
7400 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7401 if (INT_REGNO_P (scalar_regno))
7402 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7404 else if (FP_REGNO_P (scalar_regno))
7405 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7407 else if (ALTIVEC_REGNO_P (scalar_regno))
7408 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7410 else
7411 gcc_unreachable ();
7413 if (REG_P (op1) || SUBREG_P (op1))
7414 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7415 else
7416 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7419 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7420 valid_addr_p = true;
7422 else
7423 valid_addr_p = false;
7425 if (!valid_addr_p)
7427 emit_move_insn (base_tmp, new_addr);
7428 new_addr = base_tmp;
7431 return change_address (mem, scalar_mode, new_addr);
7434 /* Split a variable vec_extract operation into the component instructions. */
7436 void
7437 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7438 rtx tmp_altivec)
7440 machine_mode mode = GET_MODE (src);
7441 machine_mode scalar_mode = GET_MODE (dest);
7442 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7443 int byte_shift = exact_log2 (scalar_size);
7445 gcc_assert (byte_shift >= 0);
7447 /* If we are given a memory address, optimize to load just the element. We
7448 don't have to adjust the vector element number on little endian
7449 systems. */
7450 if (MEM_P (src))
7452 gcc_assert (REG_P (tmp_gpr));
7453 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7454 tmp_gpr, scalar_mode));
7455 return;
7458 else if (REG_P (src) || SUBREG_P (src))
7460 int bit_shift = byte_shift + 3;
7461 rtx element2;
7463 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7465 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7466 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7467 will shift the element into the upper position (adding 3 to convert a
7468 byte shift into a bit shift). */
7469 if (scalar_size == 8)
7471 if (!VECTOR_ELT_ORDER_BIG)
7473 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7474 element2 = tmp_gpr;
7476 else
7477 element2 = element;
7479 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7480 bit. */
7481 emit_insn (gen_rtx_SET (tmp_gpr,
7482 gen_rtx_AND (DImode,
7483 gen_rtx_ASHIFT (DImode,
7484 element2,
7485 GEN_INT (6)),
7486 GEN_INT (64))));
7488 else
7490 if (!VECTOR_ELT_ORDER_BIG)
7492 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7494 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7495 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7496 element2 = tmp_gpr;
7498 else
7499 element2 = element;
7501 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7504 /* Get the value into the lower byte of the Altivec register where VSLO
7505 expects it. */
7506 if (TARGET_P9_VECTOR)
7507 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7508 else if (can_create_pseudo_p ())
7509 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7510 else
7512 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7513 emit_move_insn (tmp_di, tmp_gpr);
7514 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7517 /* Do the VSLO to get the value into the final location. */
7518 switch (mode)
7520 case V2DFmode:
7521 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7522 return;
7524 case V2DImode:
7525 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7526 return;
7528 case V4SFmode:
7530 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7531 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7532 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7533 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7534 tmp_altivec));
7536 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7537 return;
7540 case V4SImode:
7541 case V8HImode:
7542 case V16QImode:
7544 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7545 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7546 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7547 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7548 tmp_altivec));
7549 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7550 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7551 GEN_INT (64 - (8 * scalar_size))));
7552 return;
7555 default:
7556 gcc_unreachable ();
7559 return;
7561 else
7562 gcc_unreachable ();
7565 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7566 two SImode values. */
7568 static void
7569 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7571 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7573 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7575 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7576 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7578 emit_move_insn (dest, GEN_INT (const1 | const2));
7579 return;
7582 /* Put si1 into upper 32-bits of dest. */
7583 if (CONST_INT_P (si1))
7584 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7585 else
7587 /* Generate RLDIC. */
7588 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7589 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7590 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7591 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7592 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7593 emit_insn (gen_rtx_SET (dest, and_rtx));
7596 /* Put si2 into the temporary. */
7597 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7598 if (CONST_INT_P (si2))
7599 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7600 else
7601 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7603 /* Combine the two parts. */
7604 emit_insn (gen_iordi3 (dest, dest, tmp));
7605 return;
7608 /* Split a V4SI initialization. */
7610 void
7611 rs6000_split_v4si_init (rtx operands[])
7613 rtx dest = operands[0];
7615 /* Destination is a GPR, build up the two DImode parts in place. */
7616 if (REG_P (dest) || SUBREG_P (dest))
7618 int d_regno = regno_or_subregno (dest);
7619 rtx scalar1 = operands[1];
7620 rtx scalar2 = operands[2];
7621 rtx scalar3 = operands[3];
7622 rtx scalar4 = operands[4];
7623 rtx tmp1 = operands[5];
7624 rtx tmp2 = operands[6];
7626 /* Even though we only need one temporary (plus the destination, which
7627 has an early clobber constraint, try to use two temporaries, one for
7628 each double word created. That way the 2nd insn scheduling pass can
7629 rearrange things so the two parts are done in parallel. */
7630 if (BYTES_BIG_ENDIAN)
7632 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7633 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7634 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7635 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7637 else
7639 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7640 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7641 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7642 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7643 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7645 return;
7648 else
7649 gcc_unreachable ();
7652 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7654 bool
7655 invalid_e500_subreg (rtx op, machine_mode mode)
7657 if (TARGET_E500_DOUBLE)
7659 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7660 subreg:TI and reg:TF. Decimal float modes are like integer
7661 modes (only low part of each register used) for this
7662 purpose. */
7663 if (GET_CODE (op) == SUBREG
7664 && (mode == SImode || mode == DImode || mode == TImode
7665 || mode == DDmode || mode == TDmode || mode == PTImode)
7666 && REG_P (SUBREG_REG (op))
7667 && (GET_MODE (SUBREG_REG (op)) == DFmode
7668 || GET_MODE (SUBREG_REG (op)) == TFmode
7669 || GET_MODE (SUBREG_REG (op)) == IFmode
7670 || GET_MODE (SUBREG_REG (op)) == KFmode))
7671 return true;
7673 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7674 reg:TI. */
7675 if (GET_CODE (op) == SUBREG
7676 && (mode == DFmode || mode == TFmode || mode == IFmode
7677 || mode == KFmode)
7678 && REG_P (SUBREG_REG (op))
7679 && (GET_MODE (SUBREG_REG (op)) == DImode
7680 || GET_MODE (SUBREG_REG (op)) == TImode
7681 || GET_MODE (SUBREG_REG (op)) == PTImode
7682 || GET_MODE (SUBREG_REG (op)) == DDmode
7683 || GET_MODE (SUBREG_REG (op)) == TDmode))
7684 return true;
7687 if (TARGET_SPE
7688 && GET_CODE (op) == SUBREG
7689 && mode == SImode
7690 && REG_P (SUBREG_REG (op))
7691 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7692 return true;
7694 return false;
7697 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7698 selects whether the alignment is abi mandated, optional, or
7699 both abi and optional alignment. */
7701 unsigned int
7702 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7704 if (how != align_opt)
7706 if (TREE_CODE (type) == VECTOR_TYPE)
7708 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7709 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7711 if (align < 64)
7712 align = 64;
7714 else if (align < 128)
7715 align = 128;
7717 else if (TARGET_E500_DOUBLE
7718 && TREE_CODE (type) == REAL_TYPE
7719 && TYPE_MODE (type) == DFmode)
7721 if (align < 64)
7722 align = 64;
7726 if (how != align_abi)
7728 if (TREE_CODE (type) == ARRAY_TYPE
7729 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7731 if (align < BITS_PER_WORD)
7732 align = BITS_PER_WORD;
7736 return align;
7739 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7741 bool
7742 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7744 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7746 if (computed != 128)
7748 static bool warned;
7749 if (!warned && warn_psabi)
7751 warned = true;
7752 inform (input_location,
7753 "the layout of aggregates containing vectors with"
7754 " %d-byte alignment has changed in GCC 5",
7755 computed / BITS_PER_UNIT);
7758 /* In current GCC there is no special case. */
7759 return false;
7762 return false;
7765 /* AIX increases natural record alignment to doubleword if the first
7766 field is an FP double while the FP fields remain word aligned. */
7768 unsigned int
7769 rs6000_special_round_type_align (tree type, unsigned int computed,
7770 unsigned int specified)
7772 unsigned int align = MAX (computed, specified);
7773 tree field = TYPE_FIELDS (type);
7775 /* Skip all non field decls */
7776 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7777 field = DECL_CHAIN (field);
7779 if (field != NULL && field != type)
7781 type = TREE_TYPE (field);
7782 while (TREE_CODE (type) == ARRAY_TYPE)
7783 type = TREE_TYPE (type);
7785 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7786 align = MAX (align, 64);
7789 return align;
7792 /* Darwin increases record alignment to the natural alignment of
7793 the first field. */
7795 unsigned int
7796 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7797 unsigned int specified)
7799 unsigned int align = MAX (computed, specified);
7801 if (TYPE_PACKED (type))
7802 return align;
7804 /* Find the first field, looking down into aggregates. */
7805 do {
7806 tree field = TYPE_FIELDS (type);
7807 /* Skip all non field decls */
7808 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7809 field = DECL_CHAIN (field);
7810 if (! field)
7811 break;
7812 /* A packed field does not contribute any extra alignment. */
7813 if (DECL_PACKED (field))
7814 return align;
7815 type = TREE_TYPE (field);
7816 while (TREE_CODE (type) == ARRAY_TYPE)
7817 type = TREE_TYPE (type);
7818 } while (AGGREGATE_TYPE_P (type));
7820 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7821 align = MAX (align, TYPE_ALIGN (type));
7823 return align;
7826 /* Return 1 for an operand in small memory on V.4/eabi. */
7829 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7830 machine_mode mode ATTRIBUTE_UNUSED)
7832 #if TARGET_ELF
7833 rtx sym_ref;
7835 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7836 return 0;
7838 if (DEFAULT_ABI != ABI_V4)
7839 return 0;
7841 /* Vector and float memory instructions have a limited offset on the
7842 SPE, so using a vector or float variable directly as an operand is
7843 not useful. */
7844 if (TARGET_SPE
7845 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7846 return 0;
7848 if (GET_CODE (op) == SYMBOL_REF)
7849 sym_ref = op;
7851 else if (GET_CODE (op) != CONST
7852 || GET_CODE (XEXP (op, 0)) != PLUS
7853 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7854 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7855 return 0;
7857 else
7859 rtx sum = XEXP (op, 0);
7860 HOST_WIDE_INT summand;
7862 /* We have to be careful here, because it is the referenced address
7863 that must be 32k from _SDA_BASE_, not just the symbol. */
7864 summand = INTVAL (XEXP (sum, 1));
7865 if (summand < 0 || summand > g_switch_value)
7866 return 0;
7868 sym_ref = XEXP (sum, 0);
7871 return SYMBOL_REF_SMALL_P (sym_ref);
7872 #else
7873 return 0;
7874 #endif
7877 /* Return true if either operand is a general purpose register. */
7879 bool
7880 gpr_or_gpr_p (rtx op0, rtx op1)
7882 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7883 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7886 /* Return true if this is a move direct operation between GPR registers and
7887 floating point/VSX registers. */
7889 bool
7890 direct_move_p (rtx op0, rtx op1)
7892 int regno0, regno1;
7894 if (!REG_P (op0) || !REG_P (op1))
7895 return false;
7897 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7898 return false;
7900 regno0 = REGNO (op0);
7901 regno1 = REGNO (op1);
7902 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7903 return false;
7905 if (INT_REGNO_P (regno0))
7906 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7908 else if (INT_REGNO_P (regno1))
7910 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7911 return true;
7913 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7914 return true;
7917 return false;
7920 /* Return true if the OFFSET is valid for the quad address instructions that
7921 use d-form (register + offset) addressing. */
7923 static inline bool
7924 quad_address_offset_p (HOST_WIDE_INT offset)
7926 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7929 /* Return true if the ADDR is an acceptable address for a quad memory
7930 operation of mode MODE (either LQ/STQ for general purpose registers, or
7931 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7932 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7933 3.0 LXV/STXV instruction. */
7935 bool
7936 quad_address_p (rtx addr, machine_mode mode, bool strict)
7938 rtx op0, op1;
7940 if (GET_MODE_SIZE (mode) != 16)
7941 return false;
7943 if (legitimate_indirect_address_p (addr, strict))
7944 return true;
7946 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7947 return false;
7949 if (GET_CODE (addr) != PLUS)
7950 return false;
7952 op0 = XEXP (addr, 0);
7953 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7954 return false;
7956 op1 = XEXP (addr, 1);
7957 if (!CONST_INT_P (op1))
7958 return false;
7960 return quad_address_offset_p (INTVAL (op1));
7963 /* Return true if this is a load or store quad operation. This function does
7964 not handle the atomic quad memory instructions. */
7966 bool
7967 quad_load_store_p (rtx op0, rtx op1)
7969 bool ret;
7971 if (!TARGET_QUAD_MEMORY)
7972 ret = false;
7974 else if (REG_P (op0) && MEM_P (op1))
7975 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7976 && quad_memory_operand (op1, GET_MODE (op1))
7977 && !reg_overlap_mentioned_p (op0, op1));
7979 else if (MEM_P (op0) && REG_P (op1))
7980 ret = (quad_memory_operand (op0, GET_MODE (op0))
7981 && quad_int_reg_operand (op1, GET_MODE (op1)));
7983 else
7984 ret = false;
7986 if (TARGET_DEBUG_ADDR)
7988 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7989 ret ? "true" : "false");
7990 debug_rtx (gen_rtx_SET (op0, op1));
7993 return ret;
7996 /* Given an address, return a constant offset term if one exists. */
7998 static rtx
7999 address_offset (rtx op)
8001 if (GET_CODE (op) == PRE_INC
8002 || GET_CODE (op) == PRE_DEC)
8003 op = XEXP (op, 0);
8004 else if (GET_CODE (op) == PRE_MODIFY
8005 || GET_CODE (op) == LO_SUM)
8006 op = XEXP (op, 1);
8008 if (GET_CODE (op) == CONST)
8009 op = XEXP (op, 0);
8011 if (GET_CODE (op) == PLUS)
8012 op = XEXP (op, 1);
8014 if (CONST_INT_P (op))
8015 return op;
8017 return NULL_RTX;
8020 /* Return true if the MEM operand is a memory operand suitable for use
8021 with a (full width, possibly multiple) gpr load/store. On
8022 powerpc64 this means the offset must be divisible by 4.
8023 Implements 'Y' constraint.
8025 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8026 a constraint function we know the operand has satisfied a suitable
8027 memory predicate. Also accept some odd rtl generated by reload
8028 (see rs6000_legitimize_reload_address for various forms). It is
8029 important that reload rtl be accepted by appropriate constraints
8030 but not by the operand predicate.
8032 Offsetting a lo_sum should not be allowed, except where we know by
8033 alignment that a 32k boundary is not crossed, but see the ???
8034 comment in rs6000_legitimize_reload_address. Note that by
8035 "offsetting" here we mean a further offset to access parts of the
8036 MEM. It's fine to have a lo_sum where the inner address is offset
8037 from a sym, since the same sym+offset will appear in the high part
8038 of the address calculation. */
8040 bool
8041 mem_operand_gpr (rtx op, machine_mode mode)
8043 unsigned HOST_WIDE_INT offset;
8044 int extra;
8045 rtx addr = XEXP (op, 0);
8047 op = address_offset (addr);
8048 if (op == NULL_RTX)
8049 return true;
8051 offset = INTVAL (op);
8052 if (TARGET_POWERPC64 && (offset & 3) != 0)
8053 return false;
8055 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8056 if (extra < 0)
8057 extra = 0;
8059 if (GET_CODE (addr) == LO_SUM)
8060 /* For lo_sum addresses, we must allow any offset except one that
8061 causes a wrap, so test only the low 16 bits. */
8062 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8064 return offset + 0x8000 < 0x10000u - extra;
8067 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8068 enforce an offset divisible by 4 even for 32-bit. */
8070 bool
8071 mem_operand_ds_form (rtx op, machine_mode mode)
8073 unsigned HOST_WIDE_INT offset;
8074 int extra;
8075 rtx addr = XEXP (op, 0);
8077 if (!offsettable_address_p (false, mode, addr))
8078 return false;
8080 op = address_offset (addr);
8081 if (op == NULL_RTX)
8082 return true;
8084 offset = INTVAL (op);
8085 if ((offset & 3) != 0)
8086 return false;
8088 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8089 if (extra < 0)
8090 extra = 0;
8092 if (GET_CODE (addr) == LO_SUM)
8093 /* For lo_sum addresses, we must allow any offset except one that
8094 causes a wrap, so test only the low 16 bits. */
8095 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8097 return offset + 0x8000 < 0x10000u - extra;
8100 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8102 static bool
8103 reg_offset_addressing_ok_p (machine_mode mode)
8105 switch (mode)
8107 case V16QImode:
8108 case V8HImode:
8109 case V4SFmode:
8110 case V4SImode:
8111 case V2DFmode:
8112 case V2DImode:
8113 case V1TImode:
8114 case TImode:
8115 case TFmode:
8116 case KFmode:
8117 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8118 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8119 a vector mode, if we want to use the VSX registers to move it around,
8120 we need to restrict ourselves to reg+reg addressing. Similarly for
8121 IEEE 128-bit floating point that is passed in a single vector
8122 register. */
8123 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8124 return mode_supports_vsx_dform_quad (mode);
8125 break;
8127 case V4HImode:
8128 case V2SImode:
8129 case V1DImode:
8130 case V2SFmode:
8131 /* Paired vector modes. Only reg+reg addressing is valid. */
8132 if (TARGET_PAIRED_FLOAT)
8133 return false;
8134 break;
8136 case SDmode:
8137 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8138 addressing for the LFIWZX and STFIWX instructions. */
8139 if (TARGET_NO_SDMODE_STACK)
8140 return false;
8141 break;
8143 default:
8144 break;
8147 return true;
8150 static bool
8151 virtual_stack_registers_memory_p (rtx op)
8153 int regnum;
8155 if (GET_CODE (op) == REG)
8156 regnum = REGNO (op);
8158 else if (GET_CODE (op) == PLUS
8159 && GET_CODE (XEXP (op, 0)) == REG
8160 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8161 regnum = REGNO (XEXP (op, 0));
8163 else
8164 return false;
8166 return (regnum >= FIRST_VIRTUAL_REGISTER
8167 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8170 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8171 is known to not straddle a 32k boundary. This function is used
8172 to determine whether -mcmodel=medium code can use TOC pointer
8173 relative addressing for OP. This means the alignment of the TOC
8174 pointer must also be taken into account, and unfortunately that is
8175 only 8 bytes. */
8177 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8178 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8179 #endif
8181 static bool
8182 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8183 machine_mode mode)
8185 tree decl;
8186 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8188 if (GET_CODE (op) != SYMBOL_REF)
8189 return false;
8191 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8192 SYMBOL_REF. */
8193 if (mode_supports_vsx_dform_quad (mode))
8194 return false;
8196 dsize = GET_MODE_SIZE (mode);
8197 decl = SYMBOL_REF_DECL (op);
8198 if (!decl)
8200 if (dsize == 0)
8201 return false;
8203 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8204 replacing memory addresses with an anchor plus offset. We
8205 could find the decl by rummaging around in the block->objects
8206 VEC for the given offset but that seems like too much work. */
8207 dalign = BITS_PER_UNIT;
8208 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8209 && SYMBOL_REF_ANCHOR_P (op)
8210 && SYMBOL_REF_BLOCK (op) != NULL)
8212 struct object_block *block = SYMBOL_REF_BLOCK (op);
8214 dalign = block->alignment;
8215 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8217 else if (CONSTANT_POOL_ADDRESS_P (op))
8219 /* It would be nice to have get_pool_align().. */
8220 machine_mode cmode = get_pool_mode (op);
8222 dalign = GET_MODE_ALIGNMENT (cmode);
8225 else if (DECL_P (decl))
8227 dalign = DECL_ALIGN (decl);
8229 if (dsize == 0)
8231 /* Allow BLKmode when the entire object is known to not
8232 cross a 32k boundary. */
8233 if (!DECL_SIZE_UNIT (decl))
8234 return false;
8236 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8237 return false;
8239 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8240 if (dsize > 32768)
8241 return false;
8243 dalign /= BITS_PER_UNIT;
8244 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8245 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8246 return dalign >= dsize;
8249 else
8250 gcc_unreachable ();
8252 /* Find how many bits of the alignment we know for this access. */
8253 dalign /= BITS_PER_UNIT;
8254 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8255 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8256 mask = dalign - 1;
8257 lsb = offset & -offset;
8258 mask &= lsb - 1;
8259 dalign = mask + 1;
8261 return dalign >= dsize;
8264 static bool
8265 constant_pool_expr_p (rtx op)
8267 rtx base, offset;
8269 split_const (op, &base, &offset);
8270 return (GET_CODE (base) == SYMBOL_REF
8271 && CONSTANT_POOL_ADDRESS_P (base)
8272 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8275 static const_rtx tocrel_base, tocrel_offset;
8277 /* Return true if OP is a toc pointer relative address (the output
8278 of create_TOC_reference). If STRICT, do not match non-split
8279 -mcmodel=large/medium toc pointer relative addresses. */
8281 bool
8282 toc_relative_expr_p (const_rtx op, bool strict)
8284 if (!TARGET_TOC)
8285 return false;
8287 if (TARGET_CMODEL != CMODEL_SMALL)
8289 /* When strict ensure we have everything tidy. */
8290 if (strict
8291 && !(GET_CODE (op) == LO_SUM
8292 && REG_P (XEXP (op, 0))
8293 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8294 return false;
8296 /* When not strict, allow non-split TOC addresses and also allow
8297 (lo_sum (high ..)) TOC addresses created during reload. */
8298 if (GET_CODE (op) == LO_SUM)
8299 op = XEXP (op, 1);
8302 tocrel_base = op;
8303 tocrel_offset = const0_rtx;
8304 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8306 tocrel_base = XEXP (op, 0);
8307 tocrel_offset = XEXP (op, 1);
8310 return (GET_CODE (tocrel_base) == UNSPEC
8311 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8314 /* Return true if X is a constant pool address, and also for cmodel=medium
8315 if X is a toc-relative address known to be offsettable within MODE. */
8317 bool
8318 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8319 bool strict)
8321 return (toc_relative_expr_p (x, strict)
8322 && (TARGET_CMODEL != CMODEL_MEDIUM
8323 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8324 || mode == QImode
8325 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8326 INTVAL (tocrel_offset), mode)));
8329 static bool
8330 legitimate_small_data_p (machine_mode mode, rtx x)
8332 return (DEFAULT_ABI == ABI_V4
8333 && !flag_pic && !TARGET_TOC
8334 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8335 && small_data_operand (x, mode));
8338 /* SPE offset addressing is limited to 5-bits worth of double words. */
8339 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8341 bool
8342 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8343 bool strict, bool worst_case)
8345 unsigned HOST_WIDE_INT offset;
8346 unsigned int extra;
8348 if (GET_CODE (x) != PLUS)
8349 return false;
8350 if (!REG_P (XEXP (x, 0)))
8351 return false;
8352 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8353 return false;
8354 if (mode_supports_vsx_dform_quad (mode))
8355 return quad_address_p (x, mode, strict);
8356 if (!reg_offset_addressing_ok_p (mode))
8357 return virtual_stack_registers_memory_p (x);
8358 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8359 return true;
8360 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8361 return false;
8363 offset = INTVAL (XEXP (x, 1));
8364 extra = 0;
8365 switch (mode)
8367 case V4HImode:
8368 case V2SImode:
8369 case V1DImode:
8370 case V2SFmode:
8371 /* SPE vector modes. */
8372 return SPE_CONST_OFFSET_OK (offset);
8374 case DFmode:
8375 case DDmode:
8376 case DImode:
8377 /* On e500v2, we may have:
8379 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8381 Which gets addressed with evldd instructions. */
8382 if (TARGET_E500_DOUBLE)
8383 return SPE_CONST_OFFSET_OK (offset);
8385 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8386 addressing. */
8387 if (VECTOR_MEM_VSX_P (mode))
8388 return false;
8390 if (!worst_case)
8391 break;
8392 if (!TARGET_POWERPC64)
8393 extra = 4;
8394 else if (offset & 3)
8395 return false;
8396 break;
8398 case TFmode:
8399 case IFmode:
8400 case KFmode:
8401 if (TARGET_E500_DOUBLE)
8402 return (SPE_CONST_OFFSET_OK (offset)
8403 && SPE_CONST_OFFSET_OK (offset + 8));
8404 /* fall through */
8406 case TDmode:
8407 case TImode:
8408 case PTImode:
8409 extra = 8;
8410 if (!worst_case)
8411 break;
8412 if (!TARGET_POWERPC64)
8413 extra = 12;
8414 else if (offset & 3)
8415 return false;
8416 break;
8418 default:
8419 break;
8422 offset += 0x8000;
8423 return offset < 0x10000 - extra;
8426 bool
8427 legitimate_indexed_address_p (rtx x, int strict)
8429 rtx op0, op1;
8431 if (GET_CODE (x) != PLUS)
8432 return false;
8434 op0 = XEXP (x, 0);
8435 op1 = XEXP (x, 1);
8437 /* Recognize the rtl generated by reload which we know will later be
8438 replaced with proper base and index regs. */
8439 if (!strict
8440 && reload_in_progress
8441 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8442 && REG_P (op1))
8443 return true;
8445 return (REG_P (op0) && REG_P (op1)
8446 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8447 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8448 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8449 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8452 bool
8453 avoiding_indexed_address_p (machine_mode mode)
8455 /* Avoid indexed addressing for modes that have non-indexed
8456 load/store instruction forms. */
8457 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8460 bool
8461 legitimate_indirect_address_p (rtx x, int strict)
8463 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8466 bool
8467 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8469 if (!TARGET_MACHO || !flag_pic
8470 || mode != SImode || GET_CODE (x) != MEM)
8471 return false;
8472 x = XEXP (x, 0);
8474 if (GET_CODE (x) != LO_SUM)
8475 return false;
8476 if (GET_CODE (XEXP (x, 0)) != REG)
8477 return false;
8478 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8479 return false;
8480 x = XEXP (x, 1);
8482 return CONSTANT_P (x);
8485 static bool
8486 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8488 if (GET_CODE (x) != LO_SUM)
8489 return false;
8490 if (GET_CODE (XEXP (x, 0)) != REG)
8491 return false;
8492 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8493 return false;
8494 /* quad word addresses are restricted, and we can't use LO_SUM. */
8495 if (mode_supports_vsx_dform_quad (mode))
8496 return false;
8497 /* Restrict addressing for DI because of our SUBREG hackery. */
8498 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8499 return false;
8500 x = XEXP (x, 1);
8502 if (TARGET_ELF || TARGET_MACHO)
8504 bool large_toc_ok;
8506 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8507 return false;
8508 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8509 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8510 recognizes some LO_SUM addresses as valid although this
8511 function says opposite. In most cases, LRA through different
8512 transformations can generate correct code for address reloads.
8513 It can not manage only some LO_SUM cases. So we need to add
8514 code analogous to one in rs6000_legitimize_reload_address for
8515 LOW_SUM here saying that some addresses are still valid. */
8516 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8517 && small_toc_ref (x, VOIDmode));
8518 if (TARGET_TOC && ! large_toc_ok)
8519 return false;
8520 if (GET_MODE_NUNITS (mode) != 1)
8521 return false;
8522 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8523 && !(/* ??? Assume floating point reg based on mode? */
8524 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8525 && (mode == DFmode || mode == DDmode)))
8526 return false;
8528 return CONSTANT_P (x) || large_toc_ok;
8531 return false;
8535 /* Try machine-dependent ways of modifying an illegitimate address
8536 to be legitimate. If we find one, return the new, valid address.
8537 This is used from only one place: `memory_address' in explow.c.
8539 OLDX is the address as it was before break_out_memory_refs was
8540 called. In some cases it is useful to look at this to decide what
8541 needs to be done.
8543 It is always safe for this function to do nothing. It exists to
8544 recognize opportunities to optimize the output.
8546 On RS/6000, first check for the sum of a register with a constant
8547 integer that is out of range. If so, generate code to add the
8548 constant with the low-order 16 bits masked to the register and force
8549 this result into another register (this can be done with `cau').
8550 Then generate an address of REG+(CONST&0xffff), allowing for the
8551 possibility of bit 16 being a one.
8553 Then check for the sum of a register and something not constant, try to
8554 load the other things into a register and return the sum. */
8556 static rtx
8557 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8558 machine_mode mode)
8560 unsigned int extra;
8562 if (!reg_offset_addressing_ok_p (mode)
8563 || mode_supports_vsx_dform_quad (mode))
8565 if (virtual_stack_registers_memory_p (x))
8566 return x;
8568 /* In theory we should not be seeing addresses of the form reg+0,
8569 but just in case it is generated, optimize it away. */
8570 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8571 return force_reg (Pmode, XEXP (x, 0));
8573 /* For TImode with load/store quad, restrict addresses to just a single
8574 pointer, so it works with both GPRs and VSX registers. */
8575 /* Make sure both operands are registers. */
8576 else if (GET_CODE (x) == PLUS
8577 && (mode != TImode || !TARGET_VSX_TIMODE))
8578 return gen_rtx_PLUS (Pmode,
8579 force_reg (Pmode, XEXP (x, 0)),
8580 force_reg (Pmode, XEXP (x, 1)));
8581 else
8582 return force_reg (Pmode, x);
8584 if (GET_CODE (x) == SYMBOL_REF)
8586 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8587 if (model != 0)
8588 return rs6000_legitimize_tls_address (x, model);
8591 extra = 0;
8592 switch (mode)
8594 case TFmode:
8595 case TDmode:
8596 case TImode:
8597 case PTImode:
8598 case IFmode:
8599 case KFmode:
8600 /* As in legitimate_offset_address_p we do not assume
8601 worst-case. The mode here is just a hint as to the registers
8602 used. A TImode is usually in gprs, but may actually be in
8603 fprs. Leave worst-case scenario for reload to handle via
8604 insn constraints. PTImode is only GPRs. */
8605 extra = 8;
8606 break;
8607 default:
8608 break;
8611 if (GET_CODE (x) == PLUS
8612 && GET_CODE (XEXP (x, 0)) == REG
8613 && GET_CODE (XEXP (x, 1)) == CONST_INT
8614 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8615 >= 0x10000 - extra)
8616 && !(SPE_VECTOR_MODE (mode)
8617 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8619 HOST_WIDE_INT high_int, low_int;
8620 rtx sum;
8621 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8622 if (low_int >= 0x8000 - extra)
8623 low_int = 0;
8624 high_int = INTVAL (XEXP (x, 1)) - low_int;
8625 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8626 GEN_INT (high_int)), 0);
8627 return plus_constant (Pmode, sum, low_int);
8629 else if (GET_CODE (x) == PLUS
8630 && GET_CODE (XEXP (x, 0)) == REG
8631 && GET_CODE (XEXP (x, 1)) != CONST_INT
8632 && GET_MODE_NUNITS (mode) == 1
8633 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8634 || (/* ??? Assume floating point reg based on mode? */
8635 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8636 && (mode == DFmode || mode == DDmode)))
8637 && !avoiding_indexed_address_p (mode))
8639 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8640 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8642 else if (SPE_VECTOR_MODE (mode)
8643 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8645 if (mode == DImode)
8646 return x;
8647 /* We accept [reg + reg] and [reg + OFFSET]. */
8649 if (GET_CODE (x) == PLUS)
8651 rtx op1 = XEXP (x, 0);
8652 rtx op2 = XEXP (x, 1);
8653 rtx y;
8655 op1 = force_reg (Pmode, op1);
8657 if (GET_CODE (op2) != REG
8658 && (GET_CODE (op2) != CONST_INT
8659 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8660 || (GET_MODE_SIZE (mode) > 8
8661 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8662 op2 = force_reg (Pmode, op2);
8664 /* We can't always do [reg + reg] for these, because [reg +
8665 reg + offset] is not a legitimate addressing mode. */
8666 y = gen_rtx_PLUS (Pmode, op1, op2);
8668 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8669 return force_reg (Pmode, y);
8670 else
8671 return y;
8674 return force_reg (Pmode, x);
8676 else if ((TARGET_ELF
8677 #if TARGET_MACHO
8678 || !MACHO_DYNAMIC_NO_PIC_P
8679 #endif
8681 && TARGET_32BIT
8682 && TARGET_NO_TOC
8683 && ! flag_pic
8684 && GET_CODE (x) != CONST_INT
8685 && GET_CODE (x) != CONST_WIDE_INT
8686 && GET_CODE (x) != CONST_DOUBLE
8687 && CONSTANT_P (x)
8688 && GET_MODE_NUNITS (mode) == 1
8689 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8690 || (/* ??? Assume floating point reg based on mode? */
8691 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8692 && (mode == DFmode || mode == DDmode))))
8694 rtx reg = gen_reg_rtx (Pmode);
8695 if (TARGET_ELF)
8696 emit_insn (gen_elf_high (reg, x));
8697 else
8698 emit_insn (gen_macho_high (reg, x));
8699 return gen_rtx_LO_SUM (Pmode, reg, x);
8701 else if (TARGET_TOC
8702 && GET_CODE (x) == SYMBOL_REF
8703 && constant_pool_expr_p (x)
8704 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8705 return create_TOC_reference (x, NULL_RTX);
8706 else
8707 return x;
8710 /* Debug version of rs6000_legitimize_address. */
8711 static rtx
8712 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8714 rtx ret;
8715 rtx_insn *insns;
8717 start_sequence ();
8718 ret = rs6000_legitimize_address (x, oldx, mode);
8719 insns = get_insns ();
8720 end_sequence ();
8722 if (ret != x)
8724 fprintf (stderr,
8725 "\nrs6000_legitimize_address: mode %s, old code %s, "
8726 "new code %s, modified\n",
8727 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8728 GET_RTX_NAME (GET_CODE (ret)));
8730 fprintf (stderr, "Original address:\n");
8731 debug_rtx (x);
8733 fprintf (stderr, "oldx:\n");
8734 debug_rtx (oldx);
8736 fprintf (stderr, "New address:\n");
8737 debug_rtx (ret);
8739 if (insns)
8741 fprintf (stderr, "Insns added:\n");
8742 debug_rtx_list (insns, 20);
8745 else
8747 fprintf (stderr,
8748 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8749 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8751 debug_rtx (x);
8754 if (insns)
8755 emit_insn (insns);
8757 return ret;
8760 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8761 We need to emit DTP-relative relocations. */
8763 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8764 static void
8765 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8767 switch (size)
8769 case 4:
8770 fputs ("\t.long\t", file);
8771 break;
8772 case 8:
8773 fputs (DOUBLE_INT_ASM_OP, file);
8774 break;
8775 default:
8776 gcc_unreachable ();
8778 output_addr_const (file, x);
8779 if (TARGET_ELF)
8780 fputs ("@dtprel+0x8000", file);
8781 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8783 switch (SYMBOL_REF_TLS_MODEL (x))
8785 case 0:
8786 break;
8787 case TLS_MODEL_LOCAL_EXEC:
8788 fputs ("@le", file);
8789 break;
8790 case TLS_MODEL_INITIAL_EXEC:
8791 fputs ("@ie", file);
8792 break;
8793 case TLS_MODEL_GLOBAL_DYNAMIC:
8794 case TLS_MODEL_LOCAL_DYNAMIC:
8795 fputs ("@m", file);
8796 break;
8797 default:
8798 gcc_unreachable ();
8803 /* Return true if X is a symbol that refers to real (rather than emulated)
8804 TLS. */
8806 static bool
8807 rs6000_real_tls_symbol_ref_p (rtx x)
8809 return (GET_CODE (x) == SYMBOL_REF
8810 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8813 /* In the name of slightly smaller debug output, and to cater to
8814 general assembler lossage, recognize various UNSPEC sequences
8815 and turn them back into a direct symbol reference. */
8817 static rtx
8818 rs6000_delegitimize_address (rtx orig_x)
8820 rtx x, y, offset;
8822 orig_x = delegitimize_mem_from_attrs (orig_x);
8823 x = orig_x;
8824 if (MEM_P (x))
8825 x = XEXP (x, 0);
8827 y = x;
8828 if (TARGET_CMODEL != CMODEL_SMALL
8829 && GET_CODE (y) == LO_SUM)
8830 y = XEXP (y, 1);
8832 offset = NULL_RTX;
8833 if (GET_CODE (y) == PLUS
8834 && GET_MODE (y) == Pmode
8835 && CONST_INT_P (XEXP (y, 1)))
8837 offset = XEXP (y, 1);
8838 y = XEXP (y, 0);
8841 if (GET_CODE (y) == UNSPEC
8842 && XINT (y, 1) == UNSPEC_TOCREL)
8844 y = XVECEXP (y, 0, 0);
8846 #ifdef HAVE_AS_TLS
8847 /* Do not associate thread-local symbols with the original
8848 constant pool symbol. */
8849 if (TARGET_XCOFF
8850 && GET_CODE (y) == SYMBOL_REF
8851 && CONSTANT_POOL_ADDRESS_P (y)
8852 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8853 return orig_x;
8854 #endif
8856 if (offset != NULL_RTX)
8857 y = gen_rtx_PLUS (Pmode, y, offset);
8858 if (!MEM_P (orig_x))
8859 return y;
8860 else
8861 return replace_equiv_address_nv (orig_x, y);
8864 if (TARGET_MACHO
8865 && GET_CODE (orig_x) == LO_SUM
8866 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8868 y = XEXP (XEXP (orig_x, 1), 0);
8869 if (GET_CODE (y) == UNSPEC
8870 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8871 return XVECEXP (y, 0, 0);
8874 return orig_x;
8877 /* Return true if X shouldn't be emitted into the debug info.
8878 The linker doesn't like .toc section references from
8879 .debug_* sections, so reject .toc section symbols. */
8881 static bool
8882 rs6000_const_not_ok_for_debug_p (rtx x)
8884 if (GET_CODE (x) == SYMBOL_REF
8885 && CONSTANT_POOL_ADDRESS_P (x))
8887 rtx c = get_pool_constant (x);
8888 machine_mode cmode = get_pool_mode (x);
8889 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8890 return true;
8893 return false;
8896 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8898 static GTY(()) rtx rs6000_tls_symbol;
8899 static rtx
8900 rs6000_tls_get_addr (void)
8902 if (!rs6000_tls_symbol)
8903 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8905 return rs6000_tls_symbol;
8908 /* Construct the SYMBOL_REF for TLS GOT references. */
8910 static GTY(()) rtx rs6000_got_symbol;
8911 static rtx
8912 rs6000_got_sym (void)
8914 if (!rs6000_got_symbol)
8916 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8917 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8918 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8921 return rs6000_got_symbol;
8924 /* AIX Thread-Local Address support. */
8926 static rtx
8927 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8929 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8930 const char *name;
8931 char *tlsname;
8933 name = XSTR (addr, 0);
8934 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8935 or the symbol will be in TLS private data section. */
8936 if (name[strlen (name) - 1] != ']'
8937 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8938 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8940 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8941 strcpy (tlsname, name);
8942 strcat (tlsname,
8943 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8944 tlsaddr = copy_rtx (addr);
8945 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8947 else
8948 tlsaddr = addr;
8950 /* Place addr into TOC constant pool. */
8951 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8953 /* Output the TOC entry and create the MEM referencing the value. */
8954 if (constant_pool_expr_p (XEXP (sym, 0))
8955 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8957 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8958 mem = gen_const_mem (Pmode, tocref);
8959 set_mem_alias_set (mem, get_TOC_alias_set ());
8961 else
8962 return sym;
8964 /* Use global-dynamic for local-dynamic. */
8965 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8966 || model == TLS_MODEL_LOCAL_DYNAMIC)
8968 /* Create new TOC reference for @m symbol. */
8969 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8970 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8971 strcpy (tlsname, "*LCM");
8972 strcat (tlsname, name + 3);
8973 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8974 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8975 tocref = create_TOC_reference (modaddr, NULL_RTX);
8976 rtx modmem = gen_const_mem (Pmode, tocref);
8977 set_mem_alias_set (modmem, get_TOC_alias_set ());
8979 rtx modreg = gen_reg_rtx (Pmode);
8980 emit_insn (gen_rtx_SET (modreg, modmem));
8982 tmpreg = gen_reg_rtx (Pmode);
8983 emit_insn (gen_rtx_SET (tmpreg, mem));
8985 dest = gen_reg_rtx (Pmode);
8986 if (TARGET_32BIT)
8987 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8988 else
8989 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8990 return dest;
8992 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8993 else if (TARGET_32BIT)
8995 tlsreg = gen_reg_rtx (SImode);
8996 emit_insn (gen_tls_get_tpointer (tlsreg));
8998 else
8999 tlsreg = gen_rtx_REG (DImode, 13);
9001 /* Load the TOC value into temporary register. */
9002 tmpreg = gen_reg_rtx (Pmode);
9003 emit_insn (gen_rtx_SET (tmpreg, mem));
9004 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9005 gen_rtx_MINUS (Pmode, addr, tlsreg));
9007 /* Add TOC symbol value to TLS pointer. */
9008 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9010 return dest;
9013 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9014 this (thread-local) address. */
9016 static rtx
9017 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9019 rtx dest, insn;
9021 if (TARGET_XCOFF)
9022 return rs6000_legitimize_tls_address_aix (addr, model);
9024 dest = gen_reg_rtx (Pmode);
9025 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9027 rtx tlsreg;
9029 if (TARGET_64BIT)
9031 tlsreg = gen_rtx_REG (Pmode, 13);
9032 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9034 else
9036 tlsreg = gen_rtx_REG (Pmode, 2);
9037 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9039 emit_insn (insn);
9041 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9043 rtx tlsreg, tmp;
9045 tmp = gen_reg_rtx (Pmode);
9046 if (TARGET_64BIT)
9048 tlsreg = gen_rtx_REG (Pmode, 13);
9049 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9051 else
9053 tlsreg = gen_rtx_REG (Pmode, 2);
9054 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9056 emit_insn (insn);
9057 if (TARGET_64BIT)
9058 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9059 else
9060 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9061 emit_insn (insn);
9063 else
9065 rtx r3, got, tga, tmp1, tmp2, call_insn;
9067 /* We currently use relocations like @got@tlsgd for tls, which
9068 means the linker will handle allocation of tls entries, placing
9069 them in the .got section. So use a pointer to the .got section,
9070 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9071 or to secondary GOT sections used by 32-bit -fPIC. */
9072 if (TARGET_64BIT)
9073 got = gen_rtx_REG (Pmode, 2);
9074 else
9076 if (flag_pic == 1)
9077 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9078 else
9080 rtx gsym = rs6000_got_sym ();
9081 got = gen_reg_rtx (Pmode);
9082 if (flag_pic == 0)
9083 rs6000_emit_move (got, gsym, Pmode);
9084 else
9086 rtx mem, lab, last;
9088 tmp1 = gen_reg_rtx (Pmode);
9089 tmp2 = gen_reg_rtx (Pmode);
9090 mem = gen_const_mem (Pmode, tmp1);
9091 lab = gen_label_rtx ();
9092 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9093 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9094 if (TARGET_LINK_STACK)
9095 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9096 emit_move_insn (tmp2, mem);
9097 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9098 set_unique_reg_note (last, REG_EQUAL, gsym);
9103 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9105 tga = rs6000_tls_get_addr ();
9106 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9107 1, const0_rtx, Pmode);
9109 r3 = gen_rtx_REG (Pmode, 3);
9110 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9112 if (TARGET_64BIT)
9113 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9114 else
9115 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9117 else if (DEFAULT_ABI == ABI_V4)
9118 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9119 else
9120 gcc_unreachable ();
9121 call_insn = last_call_insn ();
9122 PATTERN (call_insn) = insn;
9123 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9124 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9125 pic_offset_table_rtx);
9127 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9129 tga = rs6000_tls_get_addr ();
9130 tmp1 = gen_reg_rtx (Pmode);
9131 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9132 1, const0_rtx, Pmode);
9134 r3 = gen_rtx_REG (Pmode, 3);
9135 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9137 if (TARGET_64BIT)
9138 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9139 else
9140 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9142 else if (DEFAULT_ABI == ABI_V4)
9143 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9144 else
9145 gcc_unreachable ();
9146 call_insn = last_call_insn ();
9147 PATTERN (call_insn) = insn;
9148 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9149 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9150 pic_offset_table_rtx);
9152 if (rs6000_tls_size == 16)
9154 if (TARGET_64BIT)
9155 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9156 else
9157 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9159 else if (rs6000_tls_size == 32)
9161 tmp2 = gen_reg_rtx (Pmode);
9162 if (TARGET_64BIT)
9163 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9164 else
9165 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9166 emit_insn (insn);
9167 if (TARGET_64BIT)
9168 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9169 else
9170 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9172 else
9174 tmp2 = gen_reg_rtx (Pmode);
9175 if (TARGET_64BIT)
9176 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9177 else
9178 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9179 emit_insn (insn);
9180 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9182 emit_insn (insn);
9184 else
9186 /* IE, or 64-bit offset LE. */
9187 tmp2 = gen_reg_rtx (Pmode);
9188 if (TARGET_64BIT)
9189 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9190 else
9191 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9192 emit_insn (insn);
9193 if (TARGET_64BIT)
9194 insn = gen_tls_tls_64 (dest, tmp2, addr);
9195 else
9196 insn = gen_tls_tls_32 (dest, tmp2, addr);
9197 emit_insn (insn);
9201 return dest;
9204 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9206 static bool
9207 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9209 if (GET_CODE (x) == HIGH
9210 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9211 return true;
9213 /* A TLS symbol in the TOC cannot contain a sum. */
9214 if (GET_CODE (x) == CONST
9215 && GET_CODE (XEXP (x, 0)) == PLUS
9216 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9217 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9218 return true;
9220 /* Do not place an ELF TLS symbol in the constant pool. */
9221 return TARGET_ELF && tls_referenced_p (x);
9224 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9225 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9226 can be addressed relative to the toc pointer. */
9228 static bool
9229 use_toc_relative_ref (rtx sym, machine_mode mode)
9231 return ((constant_pool_expr_p (sym)
9232 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9233 get_pool_mode (sym)))
9234 || (TARGET_CMODEL == CMODEL_MEDIUM
9235 && SYMBOL_REF_LOCAL_P (sym)
9236 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9239 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9240 replace the input X, or the original X if no replacement is called for.
9241 The output parameter *WIN is 1 if the calling macro should goto WIN,
9242 0 if it should not.
9244 For RS/6000, we wish to handle large displacements off a base
9245 register by splitting the addend across an addiu/addis and the mem insn.
9246 This cuts number of extra insns needed from 3 to 1.
9248 On Darwin, we use this to generate code for floating point constants.
9249 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9250 The Darwin code is inside #if TARGET_MACHO because only then are the
9251 machopic_* functions defined. */
9252 static rtx
9253 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9254 int opnum, int type,
9255 int ind_levels ATTRIBUTE_UNUSED, int *win)
9257 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9258 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9260 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9261 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9262 if (reg_offset_p
9263 && opnum == 1
9264 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9265 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9266 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9267 && TARGET_P9_VECTOR)
9268 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9269 && TARGET_P9_VECTOR)))
9270 reg_offset_p = false;
9272 /* We must recognize output that we have already generated ourselves. */
9273 if (GET_CODE (x) == PLUS
9274 && GET_CODE (XEXP (x, 0)) == PLUS
9275 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9276 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9277 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9279 if (TARGET_DEBUG_ADDR)
9281 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9282 debug_rtx (x);
9284 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9285 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9286 opnum, (enum reload_type) type);
9287 *win = 1;
9288 return x;
9291 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9292 if (GET_CODE (x) == LO_SUM
9293 && GET_CODE (XEXP (x, 0)) == HIGH)
9295 if (TARGET_DEBUG_ADDR)
9297 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9298 debug_rtx (x);
9300 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9301 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9302 opnum, (enum reload_type) type);
9303 *win = 1;
9304 return x;
9307 #if TARGET_MACHO
9308 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9309 && GET_CODE (x) == LO_SUM
9310 && GET_CODE (XEXP (x, 0)) == PLUS
9311 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9313 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9314 && machopic_operand_p (XEXP (x, 1)))
9316 /* Result of previous invocation of this function on Darwin
9317 floating point constant. */
9318 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9319 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9320 opnum, (enum reload_type) type);
9321 *win = 1;
9322 return x;
9324 #endif
9326 if (TARGET_CMODEL != CMODEL_SMALL
9327 && reg_offset_p
9328 && !quad_offset_p
9329 && small_toc_ref (x, VOIDmode))
9331 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9332 x = gen_rtx_LO_SUM (Pmode, hi, x);
9333 if (TARGET_DEBUG_ADDR)
9335 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9336 debug_rtx (x);
9338 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9339 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9340 opnum, (enum reload_type) type);
9341 *win = 1;
9342 return x;
9345 if (GET_CODE (x) == PLUS
9346 && REG_P (XEXP (x, 0))
9347 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9348 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9349 && CONST_INT_P (XEXP (x, 1))
9350 && reg_offset_p
9351 && !SPE_VECTOR_MODE (mode)
9352 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9353 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9355 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9356 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9357 HOST_WIDE_INT high
9358 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9360 /* Check for 32-bit overflow or quad addresses with one of the
9361 four least significant bits set. */
9362 if (high + low != val
9363 || (quad_offset_p && (low & 0xf)))
9365 *win = 0;
9366 return x;
9369 /* Reload the high part into a base reg; leave the low part
9370 in the mem directly. */
9372 x = gen_rtx_PLUS (GET_MODE (x),
9373 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9374 GEN_INT (high)),
9375 GEN_INT (low));
9377 if (TARGET_DEBUG_ADDR)
9379 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9380 debug_rtx (x);
9382 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9383 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9384 opnum, (enum reload_type) type);
9385 *win = 1;
9386 return x;
9389 if (GET_CODE (x) == SYMBOL_REF
9390 && reg_offset_p
9391 && !quad_offset_p
9392 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9393 && !SPE_VECTOR_MODE (mode)
9394 #if TARGET_MACHO
9395 && DEFAULT_ABI == ABI_DARWIN
9396 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9397 && machopic_symbol_defined_p (x)
9398 #else
9399 && DEFAULT_ABI == ABI_V4
9400 && !flag_pic
9401 #endif
9402 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9403 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9404 without fprs.
9405 ??? Assume floating point reg based on mode? This assumption is
9406 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9407 where reload ends up doing a DFmode load of a constant from
9408 mem using two gprs. Unfortunately, at this point reload
9409 hasn't yet selected regs so poking around in reload data
9410 won't help and even if we could figure out the regs reliably,
9411 we'd still want to allow this transformation when the mem is
9412 naturally aligned. Since we say the address is good here, we
9413 can't disable offsets from LO_SUMs in mem_operand_gpr.
9414 FIXME: Allow offset from lo_sum for other modes too, when
9415 mem is sufficiently aligned.
9417 Also disallow this if the type can go in VMX/Altivec registers, since
9418 those registers do not have d-form (reg+offset) address modes. */
9419 && !reg_addr[mode].scalar_in_vmx_p
9420 && mode != TFmode
9421 && mode != TDmode
9422 && mode != IFmode
9423 && mode != KFmode
9424 && (mode != TImode || !TARGET_VSX_TIMODE)
9425 && mode != PTImode
9426 && (mode != DImode || TARGET_POWERPC64)
9427 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9428 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9430 #if TARGET_MACHO
9431 if (flag_pic)
9433 rtx offset = machopic_gen_offset (x);
9434 x = gen_rtx_LO_SUM (GET_MODE (x),
9435 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9436 gen_rtx_HIGH (Pmode, offset)), offset);
9438 else
9439 #endif
9440 x = gen_rtx_LO_SUM (GET_MODE (x),
9441 gen_rtx_HIGH (Pmode, x), x);
9443 if (TARGET_DEBUG_ADDR)
9445 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9446 debug_rtx (x);
9448 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9449 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9450 opnum, (enum reload_type) type);
9451 *win = 1;
9452 return x;
9455 /* Reload an offset address wrapped by an AND that represents the
9456 masking of the lower bits. Strip the outer AND and let reload
9457 convert the offset address into an indirect address. For VSX,
9458 force reload to create the address with an AND in a separate
9459 register, because we can't guarantee an altivec register will
9460 be used. */
9461 if (VECTOR_MEM_ALTIVEC_P (mode)
9462 && GET_CODE (x) == AND
9463 && GET_CODE (XEXP (x, 0)) == PLUS
9464 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9465 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9466 && GET_CODE (XEXP (x, 1)) == CONST_INT
9467 && INTVAL (XEXP (x, 1)) == -16)
9469 x = XEXP (x, 0);
9470 *win = 1;
9471 return x;
9474 if (TARGET_TOC
9475 && reg_offset_p
9476 && !quad_offset_p
9477 && GET_CODE (x) == SYMBOL_REF
9478 && use_toc_relative_ref (x, mode))
9480 x = create_TOC_reference (x, NULL_RTX);
9481 if (TARGET_CMODEL != CMODEL_SMALL)
9483 if (TARGET_DEBUG_ADDR)
9485 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9486 debug_rtx (x);
9488 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9489 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9490 opnum, (enum reload_type) type);
9492 *win = 1;
9493 return x;
9495 *win = 0;
9496 return x;
9499 /* Debug version of rs6000_legitimize_reload_address. */
9500 static rtx
9501 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9502 int opnum, int type,
9503 int ind_levels, int *win)
9505 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9506 ind_levels, win);
9507 fprintf (stderr,
9508 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9509 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9510 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9511 debug_rtx (x);
9513 if (x == ret)
9514 fprintf (stderr, "Same address returned\n");
9515 else if (!ret)
9516 fprintf (stderr, "NULL returned\n");
9517 else
9519 fprintf (stderr, "New address:\n");
9520 debug_rtx (ret);
9523 return ret;
9526 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9527 that is a valid memory address for an instruction.
9528 The MODE argument is the machine mode for the MEM expression
9529 that wants to use this address.
9531 On the RS/6000, there are four valid address: a SYMBOL_REF that
9532 refers to a constant pool entry of an address (or the sum of it
9533 plus a constant), a short (16-bit signed) constant plus a register,
9534 the sum of two registers, or a register indirect, possibly with an
9535 auto-increment. For DFmode, DDmode and DImode with a constant plus
9536 register, we must ensure that both words are addressable or PowerPC64
9537 with offset word aligned.
9539 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9540 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9541 because adjacent memory cells are accessed by adding word-sized offsets
9542 during assembly output. */
9543 static bool
9544 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9546 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9547 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9549 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9550 if (VECTOR_MEM_ALTIVEC_P (mode)
9551 && GET_CODE (x) == AND
9552 && GET_CODE (XEXP (x, 1)) == CONST_INT
9553 && INTVAL (XEXP (x, 1)) == -16)
9554 x = XEXP (x, 0);
9556 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9557 return 0;
9558 if (legitimate_indirect_address_p (x, reg_ok_strict))
9559 return 1;
9560 if (TARGET_UPDATE
9561 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9562 && mode_supports_pre_incdec_p (mode)
9563 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9564 return 1;
9565 /* Handle restricted vector d-form offsets in ISA 3.0. */
9566 if (quad_offset_p)
9568 if (quad_address_p (x, mode, reg_ok_strict))
9569 return 1;
9571 else if (virtual_stack_registers_memory_p (x))
9572 return 1;
9574 else if (reg_offset_p)
9576 if (legitimate_small_data_p (mode, x))
9577 return 1;
9578 if (legitimate_constant_pool_address_p (x, mode,
9579 reg_ok_strict || lra_in_progress))
9580 return 1;
9581 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9582 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9583 return 1;
9586 /* For TImode, if we have TImode in VSX registers, only allow register
9587 indirect addresses. This will allow the values to go in either GPRs
9588 or VSX registers without reloading. The vector types would tend to
9589 go into VSX registers, so we allow REG+REG, while TImode seems
9590 somewhat split, in that some uses are GPR based, and some VSX based. */
9591 /* FIXME: We could loosen this by changing the following to
9592 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9593 but currently we cannot allow REG+REG addressing for TImode. See
9594 PR72827 for complete details on how this ends up hoodwinking DSE. */
9595 if (mode == TImode && TARGET_VSX_TIMODE)
9596 return 0;
9597 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9598 if (! reg_ok_strict
9599 && reg_offset_p
9600 && GET_CODE (x) == PLUS
9601 && GET_CODE (XEXP (x, 0)) == REG
9602 && (XEXP (x, 0) == virtual_stack_vars_rtx
9603 || XEXP (x, 0) == arg_pointer_rtx)
9604 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9605 return 1;
9606 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9607 return 1;
9608 if (!FLOAT128_2REG_P (mode)
9609 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9610 || TARGET_POWERPC64
9611 || (mode != DFmode && mode != DDmode)
9612 || (TARGET_E500_DOUBLE && mode != DDmode))
9613 && (TARGET_POWERPC64 || mode != DImode)
9614 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9615 && mode != PTImode
9616 && !avoiding_indexed_address_p (mode)
9617 && legitimate_indexed_address_p (x, reg_ok_strict))
9618 return 1;
9619 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9620 && mode_supports_pre_modify_p (mode)
9621 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9622 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9623 reg_ok_strict, false)
9624 || (!avoiding_indexed_address_p (mode)
9625 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9626 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9627 return 1;
9628 if (reg_offset_p && !quad_offset_p
9629 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9630 return 1;
9631 return 0;
9634 /* Debug version of rs6000_legitimate_address_p. */
9635 static bool
9636 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9637 bool reg_ok_strict)
9639 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9640 fprintf (stderr,
9641 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9642 "strict = %d, reload = %s, code = %s\n",
9643 ret ? "true" : "false",
9644 GET_MODE_NAME (mode),
9645 reg_ok_strict,
9646 (reload_completed
9647 ? "after"
9648 : (reload_in_progress ? "progress" : "before")),
9649 GET_RTX_NAME (GET_CODE (x)));
9650 debug_rtx (x);
9652 return ret;
9655 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9657 static bool
9658 rs6000_mode_dependent_address_p (const_rtx addr,
9659 addr_space_t as ATTRIBUTE_UNUSED)
9661 return rs6000_mode_dependent_address_ptr (addr);
9664 /* Go to LABEL if ADDR (a legitimate address expression)
9665 has an effect that depends on the machine mode it is used for.
9667 On the RS/6000 this is true of all integral offsets (since AltiVec
9668 and VSX modes don't allow them) or is a pre-increment or decrement.
9670 ??? Except that due to conceptual problems in offsettable_address_p
9671 we can't really report the problems of integral offsets. So leave
9672 this assuming that the adjustable offset must be valid for the
9673 sub-words of a TFmode operand, which is what we had before. */
9675 static bool
9676 rs6000_mode_dependent_address (const_rtx addr)
9678 switch (GET_CODE (addr))
9680 case PLUS:
9681 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9682 is considered a legitimate address before reload, so there
9683 are no offset restrictions in that case. Note that this
9684 condition is safe in strict mode because any address involving
9685 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9686 been rejected as illegitimate. */
9687 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9688 && XEXP (addr, 0) != arg_pointer_rtx
9689 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9691 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9692 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9694 break;
9696 case LO_SUM:
9697 /* Anything in the constant pool is sufficiently aligned that
9698 all bytes have the same high part address. */
9699 return !legitimate_constant_pool_address_p (addr, QImode, false);
9701 /* Auto-increment cases are now treated generically in recog.c. */
9702 case PRE_MODIFY:
9703 return TARGET_UPDATE;
9705 /* AND is only allowed in Altivec loads. */
9706 case AND:
9707 return true;
9709 default:
9710 break;
9713 return false;
9716 /* Debug version of rs6000_mode_dependent_address. */
9717 static bool
9718 rs6000_debug_mode_dependent_address (const_rtx addr)
9720 bool ret = rs6000_mode_dependent_address (addr);
9722 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9723 ret ? "true" : "false");
9724 debug_rtx (addr);
9726 return ret;
9729 /* Implement FIND_BASE_TERM. */
9732 rs6000_find_base_term (rtx op)
9734 rtx base;
9736 base = op;
9737 if (GET_CODE (base) == CONST)
9738 base = XEXP (base, 0);
9739 if (GET_CODE (base) == PLUS)
9740 base = XEXP (base, 0);
9741 if (GET_CODE (base) == UNSPEC)
9742 switch (XINT (base, 1))
9744 case UNSPEC_TOCREL:
9745 case UNSPEC_MACHOPIC_OFFSET:
9746 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9747 for aliasing purposes. */
9748 return XVECEXP (base, 0, 0);
9751 return op;
9754 /* More elaborate version of recog's offsettable_memref_p predicate
9755 that works around the ??? note of rs6000_mode_dependent_address.
9756 In particular it accepts
9758 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9760 in 32-bit mode, that the recog predicate rejects. */
9762 static bool
9763 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9765 bool worst_case;
9767 if (!MEM_P (op))
9768 return false;
9770 /* First mimic offsettable_memref_p. */
9771 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9772 return true;
9774 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9775 the latter predicate knows nothing about the mode of the memory
9776 reference and, therefore, assumes that it is the largest supported
9777 mode (TFmode). As a consequence, legitimate offsettable memory
9778 references are rejected. rs6000_legitimate_offset_address_p contains
9779 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9780 at least with a little bit of help here given that we know the
9781 actual registers used. */
9782 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9783 || GET_MODE_SIZE (reg_mode) == 4);
9784 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9785 true, worst_case);
9788 /* Determine the reassociation width to be used in reassociate_bb.
9789 This takes into account how many parallel operations we
9790 can actually do of a given type, and also the latency.
9792 int add/sub 6/cycle
9793 mul 2/cycle
9794 vect add/sub/mul 2/cycle
9795 fp add/sub/mul 2/cycle
9796 dfp 1/cycle
9799 static int
9800 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9801 enum machine_mode mode)
9803 switch (rs6000_cpu)
9805 case PROCESSOR_POWER8:
9806 case PROCESSOR_POWER9:
9807 if (DECIMAL_FLOAT_MODE_P (mode))
9808 return 1;
9809 if (VECTOR_MODE_P (mode))
9810 return 4;
9811 if (INTEGRAL_MODE_P (mode))
9812 return opc == MULT_EXPR ? 4 : 6;
9813 if (FLOAT_MODE_P (mode))
9814 return 4;
9815 break;
9816 default:
9817 break;
9819 return 1;
9822 /* Change register usage conditional on target flags. */
9823 static void
9824 rs6000_conditional_register_usage (void)
9826 int i;
9828 if (TARGET_DEBUG_TARGET)
9829 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9831 /* Set MQ register fixed (already call_used) so that it will not be
9832 allocated. */
9833 fixed_regs[64] = 1;
9835 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9836 if (TARGET_64BIT)
9837 fixed_regs[13] = call_used_regs[13]
9838 = call_really_used_regs[13] = 1;
9840 /* Conditionally disable FPRs. */
9841 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9842 for (i = 32; i < 64; i++)
9843 fixed_regs[i] = call_used_regs[i]
9844 = call_really_used_regs[i] = 1;
9846 /* The TOC register is not killed across calls in a way that is
9847 visible to the compiler. */
9848 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9849 call_really_used_regs[2] = 0;
9851 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9852 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9854 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9855 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9856 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9857 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9859 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9860 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9861 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9862 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9864 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9865 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9866 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9868 if (TARGET_SPE)
9870 global_regs[SPEFSCR_REGNO] = 1;
9871 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9872 registers in prologues and epilogues. We no longer use r14
9873 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9874 pool for link-compatibility with older versions of GCC. Once
9875 "old" code has died out, we can return r14 to the allocation
9876 pool. */
9877 fixed_regs[14]
9878 = call_used_regs[14]
9879 = call_really_used_regs[14] = 1;
9882 if (!TARGET_ALTIVEC && !TARGET_VSX)
9884 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9885 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9886 call_really_used_regs[VRSAVE_REGNO] = 1;
9889 if (TARGET_ALTIVEC || TARGET_VSX)
9890 global_regs[VSCR_REGNO] = 1;
9892 if (TARGET_ALTIVEC_ABI)
9894 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9895 call_used_regs[i] = call_really_used_regs[i] = 1;
9897 /* AIX reserves VR20:31 in non-extended ABI mode. */
9898 if (TARGET_XCOFF)
9899 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9900 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9905 /* Output insns to set DEST equal to the constant SOURCE as a series of
9906 lis, ori and shl instructions and return TRUE. */
9908 bool
9909 rs6000_emit_set_const (rtx dest, rtx source)
9911 machine_mode mode = GET_MODE (dest);
9912 rtx temp, set;
9913 rtx_insn *insn;
9914 HOST_WIDE_INT c;
9916 gcc_checking_assert (CONST_INT_P (source));
9917 c = INTVAL (source);
9918 switch (mode)
9920 case QImode:
9921 case HImode:
9922 emit_insn (gen_rtx_SET (dest, source));
9923 return true;
9925 case SImode:
9926 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9928 emit_insn (gen_rtx_SET (copy_rtx (temp),
9929 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9930 emit_insn (gen_rtx_SET (dest,
9931 gen_rtx_IOR (SImode, copy_rtx (temp),
9932 GEN_INT (c & 0xffff))));
9933 break;
9935 case DImode:
9936 if (!TARGET_POWERPC64)
9938 rtx hi, lo;
9940 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9941 DImode);
9942 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9943 DImode);
9944 emit_move_insn (hi, GEN_INT (c >> 32));
9945 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9946 emit_move_insn (lo, GEN_INT (c));
9948 else
9949 rs6000_emit_set_long_const (dest, c);
9950 break;
9952 default:
9953 gcc_unreachable ();
9956 insn = get_last_insn ();
9957 set = single_set (insn);
9958 if (! CONSTANT_P (SET_SRC (set)))
9959 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9961 return true;
9964 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9965 Output insns to set DEST equal to the constant C as a series of
9966 lis, ori and shl instructions. */
9968 static void
9969 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9971 rtx temp;
9972 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9974 ud1 = c & 0xffff;
9975 c = c >> 16;
9976 ud2 = c & 0xffff;
9977 c = c >> 16;
9978 ud3 = c & 0xffff;
9979 c = c >> 16;
9980 ud4 = c & 0xffff;
9982 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9983 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9984 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9986 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9987 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9989 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9991 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9992 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9993 if (ud1 != 0)
9994 emit_move_insn (dest,
9995 gen_rtx_IOR (DImode, copy_rtx (temp),
9996 GEN_INT (ud1)));
9998 else if (ud3 == 0 && ud4 == 0)
10000 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10002 gcc_assert (ud2 & 0x8000);
10003 emit_move_insn (copy_rtx (temp),
10004 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10005 if (ud1 != 0)
10006 emit_move_insn (copy_rtx (temp),
10007 gen_rtx_IOR (DImode, copy_rtx (temp),
10008 GEN_INT (ud1)));
10009 emit_move_insn (dest,
10010 gen_rtx_ZERO_EXTEND (DImode,
10011 gen_lowpart (SImode,
10012 copy_rtx (temp))));
10014 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10015 || (ud4 == 0 && ! (ud3 & 0x8000)))
10017 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10019 emit_move_insn (copy_rtx (temp),
10020 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10021 if (ud2 != 0)
10022 emit_move_insn (copy_rtx (temp),
10023 gen_rtx_IOR (DImode, copy_rtx (temp),
10024 GEN_INT (ud2)));
10025 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10026 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10027 GEN_INT (16)));
10028 if (ud1 != 0)
10029 emit_move_insn (dest,
10030 gen_rtx_IOR (DImode, copy_rtx (temp),
10031 GEN_INT (ud1)));
10033 else
10035 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10037 emit_move_insn (copy_rtx (temp),
10038 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10039 if (ud3 != 0)
10040 emit_move_insn (copy_rtx (temp),
10041 gen_rtx_IOR (DImode, copy_rtx (temp),
10042 GEN_INT (ud3)));
10044 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10045 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10046 GEN_INT (32)));
10047 if (ud2 != 0)
10048 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10049 gen_rtx_IOR (DImode, copy_rtx (temp),
10050 GEN_INT (ud2 << 16)));
10051 if (ud1 != 0)
10052 emit_move_insn (dest,
10053 gen_rtx_IOR (DImode, copy_rtx (temp),
10054 GEN_INT (ud1)));
10058 /* Helper for the following. Get rid of [r+r] memory refs
10059 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10061 static void
10062 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10064 if (reload_in_progress)
10065 return;
10067 if (GET_CODE (operands[0]) == MEM
10068 && GET_CODE (XEXP (operands[0], 0)) != REG
10069 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10070 GET_MODE (operands[0]), false))
10071 operands[0]
10072 = replace_equiv_address (operands[0],
10073 copy_addr_to_reg (XEXP (operands[0], 0)));
10075 if (GET_CODE (operands[1]) == MEM
10076 && GET_CODE (XEXP (operands[1], 0)) != REG
10077 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10078 GET_MODE (operands[1]), false))
10079 operands[1]
10080 = replace_equiv_address (operands[1],
10081 copy_addr_to_reg (XEXP (operands[1], 0)));
10084 /* Generate a vector of constants to permute MODE for a little-endian
10085 storage operation by swapping the two halves of a vector. */
10086 static rtvec
10087 rs6000_const_vec (machine_mode mode)
10089 int i, subparts;
10090 rtvec v;
10092 switch (mode)
10094 case V1TImode:
10095 subparts = 1;
10096 break;
10097 case V2DFmode:
10098 case V2DImode:
10099 subparts = 2;
10100 break;
10101 case V4SFmode:
10102 case V4SImode:
10103 subparts = 4;
10104 break;
10105 case V8HImode:
10106 subparts = 8;
10107 break;
10108 case V16QImode:
10109 subparts = 16;
10110 break;
10111 default:
10112 gcc_unreachable();
10115 v = rtvec_alloc (subparts);
10117 for (i = 0; i < subparts / 2; ++i)
10118 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10119 for (i = subparts / 2; i < subparts; ++i)
10120 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10122 return v;
10125 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10126 for a VSX load or store operation. */
10128 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10130 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10131 128-bit integers if they are allowed in VSX registers. */
10132 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
10133 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10134 else
10136 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10137 return gen_rtx_VEC_SELECT (mode, source, par);
10141 /* Emit a little-endian load from vector memory location SOURCE to VSX
10142 register DEST in mode MODE. The load is done with two permuting
10143 insn's that represent an lxvd2x and xxpermdi. */
10144 void
10145 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10147 rtx tmp, permute_mem, permute_reg;
10149 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10150 V1TImode). */
10151 if (mode == TImode || mode == V1TImode)
10153 mode = V2DImode;
10154 dest = gen_lowpart (V2DImode, dest);
10155 source = adjust_address (source, V2DImode, 0);
10158 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10159 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10160 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10161 emit_insn (gen_rtx_SET (tmp, permute_mem));
10162 emit_insn (gen_rtx_SET (dest, permute_reg));
10165 /* Emit a little-endian store to vector memory location DEST from VSX
10166 register SOURCE in mode MODE. The store is done with two permuting
10167 insn's that represent an xxpermdi and an stxvd2x. */
10168 void
10169 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10171 rtx tmp, permute_src, permute_tmp;
10173 /* This should never be called during or after reload, because it does
10174 not re-permute the source register. It is intended only for use
10175 during expand. */
10176 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10178 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10179 V1TImode). */
10180 if (mode == TImode || mode == V1TImode)
10182 mode = V2DImode;
10183 dest = adjust_address (dest, V2DImode, 0);
10184 source = gen_lowpart (V2DImode, source);
10187 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10188 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10189 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10190 emit_insn (gen_rtx_SET (tmp, permute_src));
10191 emit_insn (gen_rtx_SET (dest, permute_tmp));
10194 /* Emit a sequence representing a little-endian VSX load or store,
10195 moving data from SOURCE to DEST in mode MODE. This is done
10196 separately from rs6000_emit_move to ensure it is called only
10197 during expand. LE VSX loads and stores introduced later are
10198 handled with a split. The expand-time RTL generation allows
10199 us to optimize away redundant pairs of register-permutes. */
10200 void
10201 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10203 gcc_assert (!BYTES_BIG_ENDIAN
10204 && VECTOR_MEM_VSX_P (mode)
10205 && !TARGET_P9_VECTOR
10206 && !gpr_or_gpr_p (dest, source)
10207 && (MEM_P (source) ^ MEM_P (dest)));
10209 if (MEM_P (source))
10211 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10212 rs6000_emit_le_vsx_load (dest, source, mode);
10214 else
10216 if (!REG_P (source))
10217 source = force_reg (mode, source);
10218 rs6000_emit_le_vsx_store (dest, source, mode);
10222 /* Emit a move from SOURCE to DEST in mode MODE. */
10223 void
10224 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10226 rtx operands[2];
10227 operands[0] = dest;
10228 operands[1] = source;
10230 if (TARGET_DEBUG_ADDR)
10232 fprintf (stderr,
10233 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10234 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10235 GET_MODE_NAME (mode),
10236 reload_in_progress,
10237 reload_completed,
10238 can_create_pseudo_p ());
10239 debug_rtx (dest);
10240 fprintf (stderr, "source:\n");
10241 debug_rtx (source);
10244 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10245 if (CONST_WIDE_INT_P (operands[1])
10246 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10248 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10249 gcc_unreachable ();
10252 /* Check if GCC is setting up a block move that will end up using FP
10253 registers as temporaries. We must make sure this is acceptable. */
10254 if (GET_CODE (operands[0]) == MEM
10255 && GET_CODE (operands[1]) == MEM
10256 && mode == DImode
10257 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10258 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10259 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10260 ? 32 : MEM_ALIGN (operands[0])))
10261 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10262 ? 32
10263 : MEM_ALIGN (operands[1]))))
10264 && ! MEM_VOLATILE_P (operands [0])
10265 && ! MEM_VOLATILE_P (operands [1]))
10267 emit_move_insn (adjust_address (operands[0], SImode, 0),
10268 adjust_address (operands[1], SImode, 0));
10269 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10270 adjust_address (copy_rtx (operands[1]), SImode, 4));
10271 return;
10274 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10275 && !gpc_reg_operand (operands[1], mode))
10276 operands[1] = force_reg (mode, operands[1]);
10278 /* Recognize the case where operand[1] is a reference to thread-local
10279 data and load its address to a register. */
10280 if (tls_referenced_p (operands[1]))
10282 enum tls_model model;
10283 rtx tmp = operands[1];
10284 rtx addend = NULL;
10286 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10288 addend = XEXP (XEXP (tmp, 0), 1);
10289 tmp = XEXP (XEXP (tmp, 0), 0);
10292 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10293 model = SYMBOL_REF_TLS_MODEL (tmp);
10294 gcc_assert (model != 0);
10296 tmp = rs6000_legitimize_tls_address (tmp, model);
10297 if (addend)
10299 tmp = gen_rtx_PLUS (mode, tmp, addend);
10300 tmp = force_operand (tmp, operands[0]);
10302 operands[1] = tmp;
10305 /* Handle the case where reload calls us with an invalid address. */
10306 if (reload_in_progress && mode == Pmode
10307 && (! general_operand (operands[1], mode)
10308 || ! nonimmediate_operand (operands[0], mode)))
10309 goto emit_set;
10311 /* 128-bit constant floating-point values on Darwin should really be loaded
10312 as two parts. However, this premature splitting is a problem when DFmode
10313 values can go into Altivec registers. */
10314 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10315 && GET_CODE (operands[1]) == CONST_DOUBLE)
10317 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10318 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10319 DFmode);
10320 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10321 GET_MODE_SIZE (DFmode)),
10322 simplify_gen_subreg (DFmode, operands[1], mode,
10323 GET_MODE_SIZE (DFmode)),
10324 DFmode);
10325 return;
10328 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10329 cfun->machine->sdmode_stack_slot =
10330 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10333 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10334 p1:SD) if p1 is not of floating point class and p0 is spilled as
10335 we can have no analogous movsd_store for this. */
10336 if (lra_in_progress && mode == DDmode
10337 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10338 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10339 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10340 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10342 enum reg_class cl;
10343 int regno = REGNO (SUBREG_REG (operands[1]));
10345 if (regno >= FIRST_PSEUDO_REGISTER)
10347 cl = reg_preferred_class (regno);
10348 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10350 if (regno >= 0 && ! FP_REGNO_P (regno))
10352 mode = SDmode;
10353 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10354 operands[1] = SUBREG_REG (operands[1]);
10357 if (lra_in_progress
10358 && mode == SDmode
10359 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10360 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10361 && (REG_P (operands[1])
10362 || (GET_CODE (operands[1]) == SUBREG
10363 && REG_P (SUBREG_REG (operands[1])))))
10365 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10366 ? SUBREG_REG (operands[1]) : operands[1]);
10367 enum reg_class cl;
10369 if (regno >= FIRST_PSEUDO_REGISTER)
10371 cl = reg_preferred_class (regno);
10372 gcc_assert (cl != NO_REGS);
10373 regno = ira_class_hard_regs[cl][0];
10375 if (FP_REGNO_P (regno))
10377 if (GET_MODE (operands[0]) != DDmode)
10378 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10379 emit_insn (gen_movsd_store (operands[0], operands[1]));
10381 else if (INT_REGNO_P (regno))
10382 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10383 else
10384 gcc_unreachable();
10385 return;
10387 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10388 p:DD)) if p0 is not of floating point class and p1 is spilled as
10389 we can have no analogous movsd_load for this. */
10390 if (lra_in_progress && mode == DDmode
10391 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10392 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10393 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10394 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10396 enum reg_class cl;
10397 int regno = REGNO (SUBREG_REG (operands[0]));
10399 if (regno >= FIRST_PSEUDO_REGISTER)
10401 cl = reg_preferred_class (regno);
10402 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10404 if (regno >= 0 && ! FP_REGNO_P (regno))
10406 mode = SDmode;
10407 operands[0] = SUBREG_REG (operands[0]);
10408 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10411 if (lra_in_progress
10412 && mode == SDmode
10413 && (REG_P (operands[0])
10414 || (GET_CODE (operands[0]) == SUBREG
10415 && REG_P (SUBREG_REG (operands[0]))))
10416 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10417 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10419 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10420 ? SUBREG_REG (operands[0]) : operands[0]);
10421 enum reg_class cl;
10423 if (regno >= FIRST_PSEUDO_REGISTER)
10425 cl = reg_preferred_class (regno);
10426 gcc_assert (cl != NO_REGS);
10427 regno = ira_class_hard_regs[cl][0];
10429 if (FP_REGNO_P (regno))
10431 if (GET_MODE (operands[1]) != DDmode)
10432 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10433 emit_insn (gen_movsd_load (operands[0], operands[1]));
10435 else if (INT_REGNO_P (regno))
10436 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10437 else
10438 gcc_unreachable();
10439 return;
10442 if (reload_in_progress
10443 && mode == SDmode
10444 && cfun->machine->sdmode_stack_slot != NULL_RTX
10445 && MEM_P (operands[0])
10446 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10447 && REG_P (operands[1]))
10449 if (FP_REGNO_P (REGNO (operands[1])))
10451 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10452 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10453 emit_insn (gen_movsd_store (mem, operands[1]));
10455 else if (INT_REGNO_P (REGNO (operands[1])))
10457 rtx mem = operands[0];
10458 if (BYTES_BIG_ENDIAN)
10459 mem = adjust_address_nv (mem, mode, 4);
10460 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10461 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10463 else
10464 gcc_unreachable();
10465 return;
10467 if (reload_in_progress
10468 && mode == SDmode
10469 && REG_P (operands[0])
10470 && MEM_P (operands[1])
10471 && cfun->machine->sdmode_stack_slot != NULL_RTX
10472 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10474 if (FP_REGNO_P (REGNO (operands[0])))
10476 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10477 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10478 emit_insn (gen_movsd_load (operands[0], mem));
10480 else if (INT_REGNO_P (REGNO (operands[0])))
10482 rtx mem = operands[1];
10483 if (BYTES_BIG_ENDIAN)
10484 mem = adjust_address_nv (mem, mode, 4);
10485 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10486 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10488 else
10489 gcc_unreachable();
10490 return;
10493 /* FIXME: In the long term, this switch statement should go away
10494 and be replaced by a sequence of tests based on things like
10495 mode == Pmode. */
10496 switch (mode)
10498 case HImode:
10499 case QImode:
10500 if (CONSTANT_P (operands[1])
10501 && GET_CODE (operands[1]) != CONST_INT)
10502 operands[1] = force_const_mem (mode, operands[1]);
10503 break;
10505 case TFmode:
10506 case TDmode:
10507 case IFmode:
10508 case KFmode:
10509 if (FLOAT128_2REG_P (mode))
10510 rs6000_eliminate_indexed_memrefs (operands);
10511 /* fall through */
10513 case DFmode:
10514 case DDmode:
10515 case SFmode:
10516 case SDmode:
10517 if (CONSTANT_P (operands[1])
10518 && ! easy_fp_constant (operands[1], mode))
10519 operands[1] = force_const_mem (mode, operands[1]);
10520 break;
10522 case V16QImode:
10523 case V8HImode:
10524 case V4SFmode:
10525 case V4SImode:
10526 case V4HImode:
10527 case V2SFmode:
10528 case V2SImode:
10529 case V1DImode:
10530 case V2DFmode:
10531 case V2DImode:
10532 case V1TImode:
10533 if (CONSTANT_P (operands[1])
10534 && !easy_vector_constant (operands[1], mode))
10535 operands[1] = force_const_mem (mode, operands[1]);
10536 break;
10538 case SImode:
10539 case DImode:
10540 /* Use default pattern for address of ELF small data */
10541 if (TARGET_ELF
10542 && mode == Pmode
10543 && DEFAULT_ABI == ABI_V4
10544 && (GET_CODE (operands[1]) == SYMBOL_REF
10545 || GET_CODE (operands[1]) == CONST)
10546 && small_data_operand (operands[1], mode))
10548 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10549 return;
10552 if (DEFAULT_ABI == ABI_V4
10553 && mode == Pmode && mode == SImode
10554 && flag_pic == 1 && got_operand (operands[1], mode))
10556 emit_insn (gen_movsi_got (operands[0], operands[1]));
10557 return;
10560 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10561 && TARGET_NO_TOC
10562 && ! flag_pic
10563 && mode == Pmode
10564 && CONSTANT_P (operands[1])
10565 && GET_CODE (operands[1]) != HIGH
10566 && GET_CODE (operands[1]) != CONST_INT)
10568 rtx target = (!can_create_pseudo_p ()
10569 ? operands[0]
10570 : gen_reg_rtx (mode));
10572 /* If this is a function address on -mcall-aixdesc,
10573 convert it to the address of the descriptor. */
10574 if (DEFAULT_ABI == ABI_AIX
10575 && GET_CODE (operands[1]) == SYMBOL_REF
10576 && XSTR (operands[1], 0)[0] == '.')
10578 const char *name = XSTR (operands[1], 0);
10579 rtx new_ref;
10580 while (*name == '.')
10581 name++;
10582 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10583 CONSTANT_POOL_ADDRESS_P (new_ref)
10584 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10585 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10586 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10587 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10588 operands[1] = new_ref;
10591 if (DEFAULT_ABI == ABI_DARWIN)
10593 #if TARGET_MACHO
10594 if (MACHO_DYNAMIC_NO_PIC_P)
10596 /* Take care of any required data indirection. */
10597 operands[1] = rs6000_machopic_legitimize_pic_address (
10598 operands[1], mode, operands[0]);
10599 if (operands[0] != operands[1])
10600 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10601 return;
10603 #endif
10604 emit_insn (gen_macho_high (target, operands[1]));
10605 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10606 return;
10609 emit_insn (gen_elf_high (target, operands[1]));
10610 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10611 return;
10614 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10615 and we have put it in the TOC, we just need to make a TOC-relative
10616 reference to it. */
10617 if (TARGET_TOC
10618 && GET_CODE (operands[1]) == SYMBOL_REF
10619 && use_toc_relative_ref (operands[1], mode))
10620 operands[1] = create_TOC_reference (operands[1], operands[0]);
10621 else if (mode == Pmode
10622 && CONSTANT_P (operands[1])
10623 && GET_CODE (operands[1]) != HIGH
10624 && ((GET_CODE (operands[1]) != CONST_INT
10625 && ! easy_fp_constant (operands[1], mode))
10626 || (GET_CODE (operands[1]) == CONST_INT
10627 && (num_insns_constant (operands[1], mode)
10628 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10629 || (GET_CODE (operands[0]) == REG
10630 && FP_REGNO_P (REGNO (operands[0]))))
10631 && !toc_relative_expr_p (operands[1], false)
10632 && (TARGET_CMODEL == CMODEL_SMALL
10633 || can_create_pseudo_p ()
10634 || (REG_P (operands[0])
10635 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10638 #if TARGET_MACHO
10639 /* Darwin uses a special PIC legitimizer. */
10640 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10642 operands[1] =
10643 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10644 operands[0]);
10645 if (operands[0] != operands[1])
10646 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10647 return;
10649 #endif
10651 /* If we are to limit the number of things we put in the TOC and
10652 this is a symbol plus a constant we can add in one insn,
10653 just put the symbol in the TOC and add the constant. Don't do
10654 this if reload is in progress. */
10655 if (GET_CODE (operands[1]) == CONST
10656 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10657 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10658 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10659 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10660 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10661 && ! side_effects_p (operands[0]))
10663 rtx sym =
10664 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10665 rtx other = XEXP (XEXP (operands[1], 0), 1);
10667 sym = force_reg (mode, sym);
10668 emit_insn (gen_add3_insn (operands[0], sym, other));
10669 return;
10672 operands[1] = force_const_mem (mode, operands[1]);
10674 if (TARGET_TOC
10675 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10676 && constant_pool_expr_p (XEXP (operands[1], 0))
10677 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10678 get_pool_constant (XEXP (operands[1], 0)),
10679 get_pool_mode (XEXP (operands[1], 0))))
10681 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10682 operands[0]);
10683 operands[1] = gen_const_mem (mode, tocref);
10684 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10687 break;
10689 case TImode:
10690 if (!VECTOR_MEM_VSX_P (TImode))
10691 rs6000_eliminate_indexed_memrefs (operands);
10692 break;
10694 case PTImode:
10695 rs6000_eliminate_indexed_memrefs (operands);
10696 break;
10698 default:
10699 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10702 /* Above, we may have called force_const_mem which may have returned
10703 an invalid address. If we can, fix this up; otherwise, reload will
10704 have to deal with it. */
10705 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10706 operands[1] = validize_mem (operands[1]);
10708 emit_set:
10709 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10712 /* Return true if a structure, union or array containing FIELD should be
10713 accessed using `BLKMODE'.
10715 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10716 entire thing in a DI and use subregs to access the internals.
10717 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10718 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10719 best thing to do is set structs to BLKmode and avoid Severe Tire
10720 Damage.
10722 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10723 fit into 1, whereas DI still needs two. */
10725 static bool
10726 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10728 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10729 || (TARGET_E500_DOUBLE && mode == DFmode));
10732 /* Nonzero if we can use a floating-point register to pass this arg. */
10733 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10734 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10735 && (CUM)->fregno <= FP_ARG_MAX_REG \
10736 && TARGET_HARD_FLOAT && TARGET_FPRS)
10738 /* Nonzero if we can use an AltiVec register to pass this arg. */
10739 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10740 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10741 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10742 && TARGET_ALTIVEC_ABI \
10743 && (NAMED))
10745 /* Walk down the type tree of TYPE counting consecutive base elements.
10746 If *MODEP is VOIDmode, then set it to the first valid floating point
10747 or vector type. If a non-floating point or vector type is found, or
10748 if a floating point or vector type that doesn't match a non-VOIDmode
10749 *MODEP is found, then return -1, otherwise return the count in the
10750 sub-tree. */
10752 static int
10753 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10755 machine_mode mode;
10756 HOST_WIDE_INT size;
10758 switch (TREE_CODE (type))
10760 case REAL_TYPE:
10761 mode = TYPE_MODE (type);
10762 if (!SCALAR_FLOAT_MODE_P (mode))
10763 return -1;
10765 if (*modep == VOIDmode)
10766 *modep = mode;
10768 if (*modep == mode)
10769 return 1;
10771 break;
10773 case COMPLEX_TYPE:
10774 mode = TYPE_MODE (TREE_TYPE (type));
10775 if (!SCALAR_FLOAT_MODE_P (mode))
10776 return -1;
10778 if (*modep == VOIDmode)
10779 *modep = mode;
10781 if (*modep == mode)
10782 return 2;
10784 break;
10786 case VECTOR_TYPE:
10787 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10788 return -1;
10790 /* Use V4SImode as representative of all 128-bit vector types. */
10791 size = int_size_in_bytes (type);
10792 switch (size)
10794 case 16:
10795 mode = V4SImode;
10796 break;
10797 default:
10798 return -1;
10801 if (*modep == VOIDmode)
10802 *modep = mode;
10804 /* Vector modes are considered to be opaque: two vectors are
10805 equivalent for the purposes of being homogeneous aggregates
10806 if they are the same size. */
10807 if (*modep == mode)
10808 return 1;
10810 break;
10812 case ARRAY_TYPE:
10814 int count;
10815 tree index = TYPE_DOMAIN (type);
10817 /* Can't handle incomplete types nor sizes that are not
10818 fixed. */
10819 if (!COMPLETE_TYPE_P (type)
10820 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10821 return -1;
10823 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10824 if (count == -1
10825 || !index
10826 || !TYPE_MAX_VALUE (index)
10827 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10828 || !TYPE_MIN_VALUE (index)
10829 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10830 || count < 0)
10831 return -1;
10833 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10834 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10836 /* There must be no padding. */
10837 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10838 return -1;
10840 return count;
10843 case RECORD_TYPE:
10845 int count = 0;
10846 int sub_count;
10847 tree field;
10849 /* Can't handle incomplete types nor sizes that are not
10850 fixed. */
10851 if (!COMPLETE_TYPE_P (type)
10852 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10853 return -1;
10855 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10857 if (TREE_CODE (field) != FIELD_DECL)
10858 continue;
10860 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10861 if (sub_count < 0)
10862 return -1;
10863 count += sub_count;
10866 /* There must be no padding. */
10867 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10868 return -1;
10870 return count;
10873 case UNION_TYPE:
10874 case QUAL_UNION_TYPE:
10876 /* These aren't very interesting except in a degenerate case. */
10877 int count = 0;
10878 int sub_count;
10879 tree field;
10881 /* Can't handle incomplete types nor sizes that are not
10882 fixed. */
10883 if (!COMPLETE_TYPE_P (type)
10884 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10885 return -1;
10887 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10889 if (TREE_CODE (field) != FIELD_DECL)
10890 continue;
10892 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10893 if (sub_count < 0)
10894 return -1;
10895 count = count > sub_count ? count : sub_count;
10898 /* There must be no padding. */
10899 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10900 return -1;
10902 return count;
10905 default:
10906 break;
10909 return -1;
10912 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10913 float or vector aggregate that shall be passed in FP/vector registers
10914 according to the ELFv2 ABI, return the homogeneous element mode in
10915 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10917 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10919 static bool
10920 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10921 machine_mode *elt_mode,
10922 int *n_elts)
10924 /* Note that we do not accept complex types at the top level as
10925 homogeneous aggregates; these types are handled via the
10926 targetm.calls.split_complex_arg mechanism. Complex types
10927 can be elements of homogeneous aggregates, however. */
10928 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10930 machine_mode field_mode = VOIDmode;
10931 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10933 if (field_count > 0)
10935 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10936 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10938 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10939 up to AGGR_ARG_NUM_REG registers. */
10940 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10942 if (elt_mode)
10943 *elt_mode = field_mode;
10944 if (n_elts)
10945 *n_elts = field_count;
10946 return true;
10951 if (elt_mode)
10952 *elt_mode = mode;
10953 if (n_elts)
10954 *n_elts = 1;
10955 return false;
10958 /* Return a nonzero value to say to return the function value in
10959 memory, just as large structures are always returned. TYPE will be
10960 the data type of the value, and FNTYPE will be the type of the
10961 function doing the returning, or @code{NULL} for libcalls.
10963 The AIX ABI for the RS/6000 specifies that all structures are
10964 returned in memory. The Darwin ABI does the same.
10966 For the Darwin 64 Bit ABI, a function result can be returned in
10967 registers or in memory, depending on the size of the return data
10968 type. If it is returned in registers, the value occupies the same
10969 registers as it would if it were the first and only function
10970 argument. Otherwise, the function places its result in memory at
10971 the location pointed to by GPR3.
10973 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10974 but a draft put them in memory, and GCC used to implement the draft
10975 instead of the final standard. Therefore, aix_struct_return
10976 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10977 compatibility can change DRAFT_V4_STRUCT_RET to override the
10978 default, and -m switches get the final word. See
10979 rs6000_option_override_internal for more details.
10981 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10982 long double support is enabled. These values are returned in memory.
10984 int_size_in_bytes returns -1 for variable size objects, which go in
10985 memory always. The cast to unsigned makes -1 > 8. */
10987 static bool
10988 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10990 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10991 if (TARGET_MACHO
10992 && rs6000_darwin64_abi
10993 && TREE_CODE (type) == RECORD_TYPE
10994 && int_size_in_bytes (type) > 0)
10996 CUMULATIVE_ARGS valcum;
10997 rtx valret;
10999 valcum.words = 0;
11000 valcum.fregno = FP_ARG_MIN_REG;
11001 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11002 /* Do a trial code generation as if this were going to be passed
11003 as an argument; if any part goes in memory, we return NULL. */
11004 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11005 if (valret)
11006 return false;
11007 /* Otherwise fall through to more conventional ABI rules. */
11010 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11011 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11012 NULL, NULL))
11013 return false;
11015 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11016 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11017 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11018 return false;
11020 if (AGGREGATE_TYPE_P (type)
11021 && (aix_struct_return
11022 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11023 return true;
11025 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11026 modes only exist for GCC vector types if -maltivec. */
11027 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11028 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11029 return false;
11031 /* Return synthetic vectors in memory. */
11032 if (TREE_CODE (type) == VECTOR_TYPE
11033 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11035 static bool warned_for_return_big_vectors = false;
11036 if (!warned_for_return_big_vectors)
11038 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11039 "non-standard ABI extension with no compatibility guarantee");
11040 warned_for_return_big_vectors = true;
11042 return true;
11045 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11046 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11047 return true;
11049 return false;
11052 /* Specify whether values returned in registers should be at the most
11053 significant end of a register. We want aggregates returned by
11054 value to match the way aggregates are passed to functions. */
11056 static bool
11057 rs6000_return_in_msb (const_tree valtype)
11059 return (DEFAULT_ABI == ABI_ELFv2
11060 && BYTES_BIG_ENDIAN
11061 && AGGREGATE_TYPE_P (valtype)
11062 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11065 #ifdef HAVE_AS_GNU_ATTRIBUTE
11066 /* Return TRUE if a call to function FNDECL may be one that
11067 potentially affects the function calling ABI of the object file. */
11069 static bool
11070 call_ABI_of_interest (tree fndecl)
11072 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11074 struct cgraph_node *c_node;
11076 /* Libcalls are always interesting. */
11077 if (fndecl == NULL_TREE)
11078 return true;
11080 /* Any call to an external function is interesting. */
11081 if (DECL_EXTERNAL (fndecl))
11082 return true;
11084 /* Interesting functions that we are emitting in this object file. */
11085 c_node = cgraph_node::get (fndecl);
11086 c_node = c_node->ultimate_alias_target ();
11087 return !c_node->only_called_directly_p ();
11089 return false;
11091 #endif
11093 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11094 for a call to a function whose data type is FNTYPE.
11095 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11097 For incoming args we set the number of arguments in the prototype large
11098 so we never return a PARALLEL. */
11100 void
11101 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11102 rtx libname ATTRIBUTE_UNUSED, int incoming,
11103 int libcall, int n_named_args,
11104 tree fndecl ATTRIBUTE_UNUSED,
11105 machine_mode return_mode ATTRIBUTE_UNUSED)
11107 static CUMULATIVE_ARGS zero_cumulative;
11109 *cum = zero_cumulative;
11110 cum->words = 0;
11111 cum->fregno = FP_ARG_MIN_REG;
11112 cum->vregno = ALTIVEC_ARG_MIN_REG;
11113 cum->prototype = (fntype && prototype_p (fntype));
11114 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11115 ? CALL_LIBCALL : CALL_NORMAL);
11116 cum->sysv_gregno = GP_ARG_MIN_REG;
11117 cum->stdarg = stdarg_p (fntype);
11118 cum->libcall = libcall;
11120 cum->nargs_prototype = 0;
11121 if (incoming || cum->prototype)
11122 cum->nargs_prototype = n_named_args;
11124 /* Check for a longcall attribute. */
11125 if ((!fntype && rs6000_default_long_calls)
11126 || (fntype
11127 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11128 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11129 cum->call_cookie |= CALL_LONG;
11131 if (TARGET_DEBUG_ARG)
11133 fprintf (stderr, "\ninit_cumulative_args:");
11134 if (fntype)
11136 tree ret_type = TREE_TYPE (fntype);
11137 fprintf (stderr, " ret code = %s,",
11138 get_tree_code_name (TREE_CODE (ret_type)));
11141 if (cum->call_cookie & CALL_LONG)
11142 fprintf (stderr, " longcall,");
11144 fprintf (stderr, " proto = %d, nargs = %d\n",
11145 cum->prototype, cum->nargs_prototype);
11148 #ifdef HAVE_AS_GNU_ATTRIBUTE
11149 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11151 cum->escapes = call_ABI_of_interest (fndecl);
11152 if (cum->escapes)
11154 tree return_type;
11156 if (fntype)
11158 return_type = TREE_TYPE (fntype);
11159 return_mode = TYPE_MODE (return_type);
11161 else
11162 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11164 if (return_type != NULL)
11166 if (TREE_CODE (return_type) == RECORD_TYPE
11167 && TYPE_TRANSPARENT_AGGR (return_type))
11169 return_type = TREE_TYPE (first_field (return_type));
11170 return_mode = TYPE_MODE (return_type);
11172 if (AGGREGATE_TYPE_P (return_type)
11173 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11174 <= 8))
11175 rs6000_returns_struct = true;
11177 if (SCALAR_FLOAT_MODE_P (return_mode))
11179 rs6000_passes_float = true;
11180 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11181 && (FLOAT128_IBM_P (return_mode)
11182 || FLOAT128_IEEE_P (return_mode)
11183 || (return_type != NULL
11184 && (TYPE_MAIN_VARIANT (return_type)
11185 == long_double_type_node))))
11186 rs6000_passes_long_double = true;
11188 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11189 || SPE_VECTOR_MODE (return_mode))
11190 rs6000_passes_vector = true;
11193 #endif
11195 if (fntype
11196 && !TARGET_ALTIVEC
11197 && TARGET_ALTIVEC_ABI
11198 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11200 error ("cannot return value in vector register because"
11201 " altivec instructions are disabled, use -maltivec"
11202 " to enable them");
11206 /* The mode the ABI uses for a word. This is not the same as word_mode
11207 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11209 static machine_mode
11210 rs6000_abi_word_mode (void)
11212 return TARGET_32BIT ? SImode : DImode;
11215 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11216 static char *
11217 rs6000_offload_options (void)
11219 if (TARGET_64BIT)
11220 return xstrdup ("-foffload-abi=lp64");
11221 else
11222 return xstrdup ("-foffload-abi=ilp32");
11225 /* On rs6000, function arguments are promoted, as are function return
11226 values. */
11228 static machine_mode
11229 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11230 machine_mode mode,
11231 int *punsignedp ATTRIBUTE_UNUSED,
11232 const_tree, int)
11234 PROMOTE_MODE (mode, *punsignedp, type);
11236 return mode;
11239 /* Return true if TYPE must be passed on the stack and not in registers. */
11241 static bool
11242 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11244 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11245 return must_pass_in_stack_var_size (mode, type);
11246 else
11247 return must_pass_in_stack_var_size_or_pad (mode, type);
11250 static inline bool
11251 is_complex_IBM_long_double (machine_mode mode)
11253 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11256 /* Whether ABI_V4 passes MODE args to a function in floating point
11257 registers. */
11259 static bool
11260 abi_v4_pass_in_fpr (machine_mode mode)
11262 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11263 return false;
11264 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11265 return true;
11266 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11267 return true;
11268 /* ABI_V4 passes complex IBM long double in 8 gprs.
11269 Stupid, but we can't change the ABI now. */
11270 if (is_complex_IBM_long_double (mode))
11271 return false;
11272 if (FLOAT128_2REG_P (mode))
11273 return true;
11274 if (DECIMAL_FLOAT_MODE_P (mode))
11275 return true;
11276 return false;
11279 /* If defined, a C expression which determines whether, and in which
11280 direction, to pad out an argument with extra space. The value
11281 should be of type `enum direction': either `upward' to pad above
11282 the argument, `downward' to pad below, or `none' to inhibit
11283 padding.
11285 For the AIX ABI structs are always stored left shifted in their
11286 argument slot. */
11288 enum direction
11289 function_arg_padding (machine_mode mode, const_tree type)
11291 #ifndef AGGREGATE_PADDING_FIXED
11292 #define AGGREGATE_PADDING_FIXED 0
11293 #endif
11294 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11295 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11296 #endif
11298 if (!AGGREGATE_PADDING_FIXED)
11300 /* GCC used to pass structures of the same size as integer types as
11301 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11302 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11303 passed padded downward, except that -mstrict-align further
11304 muddied the water in that multi-component structures of 2 and 4
11305 bytes in size were passed padded upward.
11307 The following arranges for best compatibility with previous
11308 versions of gcc, but removes the -mstrict-align dependency. */
11309 if (BYTES_BIG_ENDIAN)
11311 HOST_WIDE_INT size = 0;
11313 if (mode == BLKmode)
11315 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11316 size = int_size_in_bytes (type);
11318 else
11319 size = GET_MODE_SIZE (mode);
11321 if (size == 1 || size == 2 || size == 4)
11322 return downward;
11324 return upward;
11327 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11329 if (type != 0 && AGGREGATE_TYPE_P (type))
11330 return upward;
11333 /* Fall back to the default. */
11334 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11337 /* If defined, a C expression that gives the alignment boundary, in bits,
11338 of an argument with the specified mode and type. If it is not defined,
11339 PARM_BOUNDARY is used for all arguments.
11341 V.4 wants long longs and doubles to be double word aligned. Just
11342 testing the mode size is a boneheaded way to do this as it means
11343 that other types such as complex int are also double word aligned.
11344 However, we're stuck with this because changing the ABI might break
11345 existing library interfaces.
11347 Doubleword align SPE vectors.
11348 Quadword align Altivec/VSX vectors.
11349 Quadword align large synthetic vector types. */
11351 static unsigned int
11352 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11354 machine_mode elt_mode;
11355 int n_elts;
11357 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11359 if (DEFAULT_ABI == ABI_V4
11360 && (GET_MODE_SIZE (mode) == 8
11361 || (TARGET_HARD_FLOAT
11362 && TARGET_FPRS
11363 && !is_complex_IBM_long_double (mode)
11364 && FLOAT128_2REG_P (mode))))
11365 return 64;
11366 else if (FLOAT128_VECTOR_P (mode))
11367 return 128;
11368 else if (SPE_VECTOR_MODE (mode)
11369 || (type && TREE_CODE (type) == VECTOR_TYPE
11370 && int_size_in_bytes (type) >= 8
11371 && int_size_in_bytes (type) < 16))
11372 return 64;
11373 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11374 || (type && TREE_CODE (type) == VECTOR_TYPE
11375 && int_size_in_bytes (type) >= 16))
11376 return 128;
11378 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11379 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11380 -mcompat-align-parm is used. */
11381 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11382 || DEFAULT_ABI == ABI_ELFv2)
11383 && type && TYPE_ALIGN (type) > 64)
11385 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11386 or homogeneous float/vector aggregates here. We already handled
11387 vector aggregates above, but still need to check for float here. */
11388 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11389 && !SCALAR_FLOAT_MODE_P (elt_mode));
11391 /* We used to check for BLKmode instead of the above aggregate type
11392 check. Warn when this results in any difference to the ABI. */
11393 if (aggregate_p != (mode == BLKmode))
11395 static bool warned;
11396 if (!warned && warn_psabi)
11398 warned = true;
11399 inform (input_location,
11400 "the ABI of passing aggregates with %d-byte alignment"
11401 " has changed in GCC 5",
11402 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11406 if (aggregate_p)
11407 return 128;
11410 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11411 implement the "aggregate type" check as a BLKmode check here; this
11412 means certain aggregate types are in fact not aligned. */
11413 if (TARGET_MACHO && rs6000_darwin64_abi
11414 && mode == BLKmode
11415 && type && TYPE_ALIGN (type) > 64)
11416 return 128;
11418 return PARM_BOUNDARY;
11421 /* The offset in words to the start of the parameter save area. */
11423 static unsigned int
11424 rs6000_parm_offset (void)
11426 return (DEFAULT_ABI == ABI_V4 ? 2
11427 : DEFAULT_ABI == ABI_ELFv2 ? 4
11428 : 6);
11431 /* For a function parm of MODE and TYPE, return the starting word in
11432 the parameter area. NWORDS of the parameter area are already used. */
11434 static unsigned int
11435 rs6000_parm_start (machine_mode mode, const_tree type,
11436 unsigned int nwords)
11438 unsigned int align;
11440 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11441 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11444 /* Compute the size (in words) of a function argument. */
11446 static unsigned long
11447 rs6000_arg_size (machine_mode mode, const_tree type)
11449 unsigned long size;
11451 if (mode != BLKmode)
11452 size = GET_MODE_SIZE (mode);
11453 else
11454 size = int_size_in_bytes (type);
11456 if (TARGET_32BIT)
11457 return (size + 3) >> 2;
11458 else
11459 return (size + 7) >> 3;
11462 /* Use this to flush pending int fields. */
11464 static void
11465 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11466 HOST_WIDE_INT bitpos, int final)
11468 unsigned int startbit, endbit;
11469 int intregs, intoffset;
11470 machine_mode mode;
11472 /* Handle the situations where a float is taking up the first half
11473 of the GPR, and the other half is empty (typically due to
11474 alignment restrictions). We can detect this by a 8-byte-aligned
11475 int field, or by seeing that this is the final flush for this
11476 argument. Count the word and continue on. */
11477 if (cum->floats_in_gpr == 1
11478 && (cum->intoffset % 64 == 0
11479 || (cum->intoffset == -1 && final)))
11481 cum->words++;
11482 cum->floats_in_gpr = 0;
11485 if (cum->intoffset == -1)
11486 return;
11488 intoffset = cum->intoffset;
11489 cum->intoffset = -1;
11490 cum->floats_in_gpr = 0;
11492 if (intoffset % BITS_PER_WORD != 0)
11494 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11495 MODE_INT, 0);
11496 if (mode == BLKmode)
11498 /* We couldn't find an appropriate mode, which happens,
11499 e.g., in packed structs when there are 3 bytes to load.
11500 Back intoffset back to the beginning of the word in this
11501 case. */
11502 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11506 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11507 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11508 intregs = (endbit - startbit) / BITS_PER_WORD;
11509 cum->words += intregs;
11510 /* words should be unsigned. */
11511 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11513 int pad = (endbit/BITS_PER_WORD) - cum->words;
11514 cum->words += pad;
11518 /* The darwin64 ABI calls for us to recurse down through structs,
11519 looking for elements passed in registers. Unfortunately, we have
11520 to track int register count here also because of misalignments
11521 in powerpc alignment mode. */
11523 static void
11524 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11525 const_tree type,
11526 HOST_WIDE_INT startbitpos)
11528 tree f;
11530 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11531 if (TREE_CODE (f) == FIELD_DECL)
11533 HOST_WIDE_INT bitpos = startbitpos;
11534 tree ftype = TREE_TYPE (f);
11535 machine_mode mode;
11536 if (ftype == error_mark_node)
11537 continue;
11538 mode = TYPE_MODE (ftype);
11540 if (DECL_SIZE (f) != 0
11541 && tree_fits_uhwi_p (bit_position (f)))
11542 bitpos += int_bit_position (f);
11544 /* ??? FIXME: else assume zero offset. */
11546 if (TREE_CODE (ftype) == RECORD_TYPE)
11547 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11548 else if (USE_FP_FOR_ARG_P (cum, mode))
11550 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11551 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11552 cum->fregno += n_fpregs;
11553 /* Single-precision floats present a special problem for
11554 us, because they are smaller than an 8-byte GPR, and so
11555 the structure-packing rules combined with the standard
11556 varargs behavior mean that we want to pack float/float
11557 and float/int combinations into a single register's
11558 space. This is complicated by the arg advance flushing,
11559 which works on arbitrarily large groups of int-type
11560 fields. */
11561 if (mode == SFmode)
11563 if (cum->floats_in_gpr == 1)
11565 /* Two floats in a word; count the word and reset
11566 the float count. */
11567 cum->words++;
11568 cum->floats_in_gpr = 0;
11570 else if (bitpos % 64 == 0)
11572 /* A float at the beginning of an 8-byte word;
11573 count it and put off adjusting cum->words until
11574 we see if a arg advance flush is going to do it
11575 for us. */
11576 cum->floats_in_gpr++;
11578 else
11580 /* The float is at the end of a word, preceded
11581 by integer fields, so the arg advance flush
11582 just above has already set cum->words and
11583 everything is taken care of. */
11586 else
11587 cum->words += n_fpregs;
11589 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11591 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11592 cum->vregno++;
11593 cum->words += 2;
11595 else if (cum->intoffset == -1)
11596 cum->intoffset = bitpos;
11600 /* Check for an item that needs to be considered specially under the darwin 64
11601 bit ABI. These are record types where the mode is BLK or the structure is
11602 8 bytes in size. */
11603 static int
11604 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11606 return rs6000_darwin64_abi
11607 && ((mode == BLKmode
11608 && TREE_CODE (type) == RECORD_TYPE
11609 && int_size_in_bytes (type) > 0)
11610 || (type && TREE_CODE (type) == RECORD_TYPE
11611 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11614 /* Update the data in CUM to advance over an argument
11615 of mode MODE and data type TYPE.
11616 (TYPE is null for libcalls where that information may not be available.)
11618 Note that for args passed by reference, function_arg will be called
11619 with MODE and TYPE set to that of the pointer to the arg, not the arg
11620 itself. */
11622 static void
11623 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11624 const_tree type, bool named, int depth)
11626 machine_mode elt_mode;
11627 int n_elts;
11629 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11631 /* Only tick off an argument if we're not recursing. */
11632 if (depth == 0)
11633 cum->nargs_prototype--;
11635 #ifdef HAVE_AS_GNU_ATTRIBUTE
11636 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11637 && cum->escapes)
11639 if (SCALAR_FLOAT_MODE_P (mode))
11641 rs6000_passes_float = true;
11642 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11643 && (FLOAT128_IBM_P (mode)
11644 || FLOAT128_IEEE_P (mode)
11645 || (type != NULL
11646 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11647 rs6000_passes_long_double = true;
11649 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11650 || (SPE_VECTOR_MODE (mode)
11651 && !cum->stdarg
11652 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11653 rs6000_passes_vector = true;
11655 #endif
11657 if (TARGET_ALTIVEC_ABI
11658 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11659 || (type && TREE_CODE (type) == VECTOR_TYPE
11660 && int_size_in_bytes (type) == 16)))
11662 bool stack = false;
11664 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11666 cum->vregno += n_elts;
11668 if (!TARGET_ALTIVEC)
11669 error ("cannot pass argument in vector register because"
11670 " altivec instructions are disabled, use -maltivec"
11671 " to enable them");
11673 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11674 even if it is going to be passed in a vector register.
11675 Darwin does the same for variable-argument functions. */
11676 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11677 && TARGET_64BIT)
11678 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11679 stack = true;
11681 else
11682 stack = true;
11684 if (stack)
11686 int align;
11688 /* Vector parameters must be 16-byte aligned. In 32-bit
11689 mode this means we need to take into account the offset
11690 to the parameter save area. In 64-bit mode, they just
11691 have to start on an even word, since the parameter save
11692 area is 16-byte aligned. */
11693 if (TARGET_32BIT)
11694 align = -(rs6000_parm_offset () + cum->words) & 3;
11695 else
11696 align = cum->words & 1;
11697 cum->words += align + rs6000_arg_size (mode, type);
11699 if (TARGET_DEBUG_ARG)
11701 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11702 cum->words, align);
11703 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11704 cum->nargs_prototype, cum->prototype,
11705 GET_MODE_NAME (mode));
11709 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11710 && !cum->stdarg
11711 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11712 cum->sysv_gregno++;
11714 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11716 int size = int_size_in_bytes (type);
11717 /* Variable sized types have size == -1 and are
11718 treated as if consisting entirely of ints.
11719 Pad to 16 byte boundary if needed. */
11720 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11721 && (cum->words % 2) != 0)
11722 cum->words++;
11723 /* For varargs, we can just go up by the size of the struct. */
11724 if (!named)
11725 cum->words += (size + 7) / 8;
11726 else
11728 /* It is tempting to say int register count just goes up by
11729 sizeof(type)/8, but this is wrong in a case such as
11730 { int; double; int; } [powerpc alignment]. We have to
11731 grovel through the fields for these too. */
11732 cum->intoffset = 0;
11733 cum->floats_in_gpr = 0;
11734 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11735 rs6000_darwin64_record_arg_advance_flush (cum,
11736 size * BITS_PER_UNIT, 1);
11738 if (TARGET_DEBUG_ARG)
11740 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11741 cum->words, TYPE_ALIGN (type), size);
11742 fprintf (stderr,
11743 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11744 cum->nargs_prototype, cum->prototype,
11745 GET_MODE_NAME (mode));
11748 else if (DEFAULT_ABI == ABI_V4)
11750 if (abi_v4_pass_in_fpr (mode))
11752 /* _Decimal128 must use an even/odd register pair. This assumes
11753 that the register number is odd when fregno is odd. */
11754 if (mode == TDmode && (cum->fregno % 2) == 1)
11755 cum->fregno++;
11757 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11758 <= FP_ARG_V4_MAX_REG)
11759 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11760 else
11762 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11763 if (mode == DFmode || FLOAT128_IBM_P (mode)
11764 || mode == DDmode || mode == TDmode)
11765 cum->words += cum->words & 1;
11766 cum->words += rs6000_arg_size (mode, type);
11769 else
11771 int n_words = rs6000_arg_size (mode, type);
11772 int gregno = cum->sysv_gregno;
11774 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11775 (r7,r8) or (r9,r10). As does any other 2 word item such
11776 as complex int due to a historical mistake. */
11777 if (n_words == 2)
11778 gregno += (1 - gregno) & 1;
11780 /* Multi-reg args are not split between registers and stack. */
11781 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11783 /* Long long and SPE vectors are aligned on the stack.
11784 So are other 2 word items such as complex int due to
11785 a historical mistake. */
11786 if (n_words == 2)
11787 cum->words += cum->words & 1;
11788 cum->words += n_words;
11791 /* Note: continuing to accumulate gregno past when we've started
11792 spilling to the stack indicates the fact that we've started
11793 spilling to the stack to expand_builtin_saveregs. */
11794 cum->sysv_gregno = gregno + n_words;
11797 if (TARGET_DEBUG_ARG)
11799 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11800 cum->words, cum->fregno);
11801 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11802 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11803 fprintf (stderr, "mode = %4s, named = %d\n",
11804 GET_MODE_NAME (mode), named);
11807 else
11809 int n_words = rs6000_arg_size (mode, type);
11810 int start_words = cum->words;
11811 int align_words = rs6000_parm_start (mode, type, start_words);
11813 cum->words = align_words + n_words;
11815 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11817 /* _Decimal128 must be passed in an even/odd float register pair.
11818 This assumes that the register number is odd when fregno is
11819 odd. */
11820 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11821 cum->fregno++;
11822 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11825 if (TARGET_DEBUG_ARG)
11827 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11828 cum->words, cum->fregno);
11829 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11830 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11831 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11832 named, align_words - start_words, depth);
11837 static void
11838 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11839 const_tree type, bool named)
11841 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11845 static rtx
11846 spe_build_register_parallel (machine_mode mode, int gregno)
11848 rtx r1, r3, r5, r7;
11850 switch (mode)
11852 case DFmode:
11853 r1 = gen_rtx_REG (DImode, gregno);
11854 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11855 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11857 case DCmode:
11858 case TFmode:
11859 r1 = gen_rtx_REG (DImode, gregno);
11860 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11861 r3 = gen_rtx_REG (DImode, gregno + 2);
11862 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11863 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11865 case TCmode:
11866 r1 = gen_rtx_REG (DImode, gregno);
11867 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11868 r3 = gen_rtx_REG (DImode, gregno + 2);
11869 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11870 r5 = gen_rtx_REG (DImode, gregno + 4);
11871 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11872 r7 = gen_rtx_REG (DImode, gregno + 6);
11873 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11874 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11876 default:
11877 gcc_unreachable ();
11881 /* Determine where to put a SIMD argument on the SPE. */
11882 static rtx
11883 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11884 const_tree type)
11886 int gregno = cum->sysv_gregno;
11888 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11889 are passed and returned in a pair of GPRs for ABI compatibility. */
11890 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11891 || mode == DCmode || mode == TCmode))
11893 int n_words = rs6000_arg_size (mode, type);
11895 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11896 if (mode == DFmode)
11897 gregno += (1 - gregno) & 1;
11899 /* Multi-reg args are not split between registers and stack. */
11900 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11901 return NULL_RTX;
11903 return spe_build_register_parallel (mode, gregno);
11905 if (cum->stdarg)
11907 int n_words = rs6000_arg_size (mode, type);
11909 /* SPE vectors are put in odd registers. */
11910 if (n_words == 2 && (gregno & 1) == 0)
11911 gregno += 1;
11913 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11915 rtx r1, r2;
11916 machine_mode m = SImode;
11918 r1 = gen_rtx_REG (m, gregno);
11919 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11920 r2 = gen_rtx_REG (m, gregno + 1);
11921 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11922 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11924 else
11925 return NULL_RTX;
11927 else
11929 if (gregno <= GP_ARG_MAX_REG)
11930 return gen_rtx_REG (mode, gregno);
11931 else
11932 return NULL_RTX;
11936 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11937 structure between cum->intoffset and bitpos to integer registers. */
11939 static void
11940 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11941 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11943 machine_mode mode;
11944 unsigned int regno;
11945 unsigned int startbit, endbit;
11946 int this_regno, intregs, intoffset;
11947 rtx reg;
11949 if (cum->intoffset == -1)
11950 return;
11952 intoffset = cum->intoffset;
11953 cum->intoffset = -1;
11955 /* If this is the trailing part of a word, try to only load that
11956 much into the register. Otherwise load the whole register. Note
11957 that in the latter case we may pick up unwanted bits. It's not a
11958 problem at the moment but may wish to revisit. */
11960 if (intoffset % BITS_PER_WORD != 0)
11962 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11963 MODE_INT, 0);
11964 if (mode == BLKmode)
11966 /* We couldn't find an appropriate mode, which happens,
11967 e.g., in packed structs when there are 3 bytes to load.
11968 Back intoffset back to the beginning of the word in this
11969 case. */
11970 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11971 mode = word_mode;
11974 else
11975 mode = word_mode;
11977 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11978 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11979 intregs = (endbit - startbit) / BITS_PER_WORD;
11980 this_regno = cum->words + intoffset / BITS_PER_WORD;
11982 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11983 cum->use_stack = 1;
11985 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11986 if (intregs <= 0)
11987 return;
11989 intoffset /= BITS_PER_UNIT;
11992 regno = GP_ARG_MIN_REG + this_regno;
11993 reg = gen_rtx_REG (mode, regno);
11994 rvec[(*k)++] =
11995 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11997 this_regno += 1;
11998 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11999 mode = word_mode;
12000 intregs -= 1;
12002 while (intregs > 0);
12005 /* Recursive workhorse for the following. */
12007 static void
12008 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12009 HOST_WIDE_INT startbitpos, rtx rvec[],
12010 int *k)
12012 tree f;
12014 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12015 if (TREE_CODE (f) == FIELD_DECL)
12017 HOST_WIDE_INT bitpos = startbitpos;
12018 tree ftype = TREE_TYPE (f);
12019 machine_mode mode;
12020 if (ftype == error_mark_node)
12021 continue;
12022 mode = TYPE_MODE (ftype);
12024 if (DECL_SIZE (f) != 0
12025 && tree_fits_uhwi_p (bit_position (f)))
12026 bitpos += int_bit_position (f);
12028 /* ??? FIXME: else assume zero offset. */
12030 if (TREE_CODE (ftype) == RECORD_TYPE)
12031 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12032 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12034 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12035 #if 0
12036 switch (mode)
12038 case SCmode: mode = SFmode; break;
12039 case DCmode: mode = DFmode; break;
12040 case TCmode: mode = TFmode; break;
12041 default: break;
12043 #endif
12044 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12045 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12047 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12048 && (mode == TFmode || mode == TDmode));
12049 /* Long double or _Decimal128 split over regs and memory. */
12050 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12051 cum->use_stack=1;
12053 rvec[(*k)++]
12054 = gen_rtx_EXPR_LIST (VOIDmode,
12055 gen_rtx_REG (mode, cum->fregno++),
12056 GEN_INT (bitpos / BITS_PER_UNIT));
12057 if (FLOAT128_2REG_P (mode))
12058 cum->fregno++;
12060 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12062 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12063 rvec[(*k)++]
12064 = gen_rtx_EXPR_LIST (VOIDmode,
12065 gen_rtx_REG (mode, cum->vregno++),
12066 GEN_INT (bitpos / BITS_PER_UNIT));
12068 else if (cum->intoffset == -1)
12069 cum->intoffset = bitpos;
12073 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12074 the register(s) to be used for each field and subfield of a struct
12075 being passed by value, along with the offset of where the
12076 register's value may be found in the block. FP fields go in FP
12077 register, vector fields go in vector registers, and everything
12078 else goes in int registers, packed as in memory.
12080 This code is also used for function return values. RETVAL indicates
12081 whether this is the case.
12083 Much of this is taken from the SPARC V9 port, which has a similar
12084 calling convention. */
12086 static rtx
12087 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12088 bool named, bool retval)
12090 rtx rvec[FIRST_PSEUDO_REGISTER];
12091 int k = 1, kbase = 1;
12092 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12093 /* This is a copy; modifications are not visible to our caller. */
12094 CUMULATIVE_ARGS copy_cum = *orig_cum;
12095 CUMULATIVE_ARGS *cum = &copy_cum;
12097 /* Pad to 16 byte boundary if needed. */
12098 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12099 && (cum->words % 2) != 0)
12100 cum->words++;
12102 cum->intoffset = 0;
12103 cum->use_stack = 0;
12104 cum->named = named;
12106 /* Put entries into rvec[] for individual FP and vector fields, and
12107 for the chunks of memory that go in int regs. Note we start at
12108 element 1; 0 is reserved for an indication of using memory, and
12109 may or may not be filled in below. */
12110 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12111 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12113 /* If any part of the struct went on the stack put all of it there.
12114 This hack is because the generic code for
12115 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12116 parts of the struct are not at the beginning. */
12117 if (cum->use_stack)
12119 if (retval)
12120 return NULL_RTX; /* doesn't go in registers at all */
12121 kbase = 0;
12122 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12124 if (k > 1 || cum->use_stack)
12125 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12126 else
12127 return NULL_RTX;
12130 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12132 static rtx
12133 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12134 int align_words)
12136 int n_units;
12137 int i, k;
12138 rtx rvec[GP_ARG_NUM_REG + 1];
12140 if (align_words >= GP_ARG_NUM_REG)
12141 return NULL_RTX;
12143 n_units = rs6000_arg_size (mode, type);
12145 /* Optimize the simple case where the arg fits in one gpr, except in
12146 the case of BLKmode due to assign_parms assuming that registers are
12147 BITS_PER_WORD wide. */
12148 if (n_units == 0
12149 || (n_units == 1 && mode != BLKmode))
12150 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12152 k = 0;
12153 if (align_words + n_units > GP_ARG_NUM_REG)
12154 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12155 using a magic NULL_RTX component.
12156 This is not strictly correct. Only some of the arg belongs in
12157 memory, not all of it. However, the normal scheme using
12158 function_arg_partial_nregs can result in unusual subregs, eg.
12159 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12160 store the whole arg to memory is often more efficient than code
12161 to store pieces, and we know that space is available in the right
12162 place for the whole arg. */
12163 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12165 i = 0;
12168 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12169 rtx off = GEN_INT (i++ * 4);
12170 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12172 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12174 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12177 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12178 but must also be copied into the parameter save area starting at
12179 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12180 to the GPRs and/or memory. Return the number of elements used. */
12182 static int
12183 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12184 int align_words, rtx *rvec)
12186 int k = 0;
12188 if (align_words < GP_ARG_NUM_REG)
12190 int n_words = rs6000_arg_size (mode, type);
12192 if (align_words + n_words > GP_ARG_NUM_REG
12193 || mode == BLKmode
12194 || (TARGET_32BIT && TARGET_POWERPC64))
12196 /* If this is partially on the stack, then we only
12197 include the portion actually in registers here. */
12198 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12199 int i = 0;
12201 if (align_words + n_words > GP_ARG_NUM_REG)
12203 /* Not all of the arg fits in gprs. Say that it goes in memory
12204 too, using a magic NULL_RTX component. Also see comment in
12205 rs6000_mixed_function_arg for why the normal
12206 function_arg_partial_nregs scheme doesn't work in this case. */
12207 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12212 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12213 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12214 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12216 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12218 else
12220 /* The whole arg fits in gprs. */
12221 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12222 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12225 else
12227 /* It's entirely in memory. */
12228 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12231 return k;
12234 /* RVEC is a vector of K components of an argument of mode MODE.
12235 Construct the final function_arg return value from it. */
12237 static rtx
12238 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12240 gcc_assert (k >= 1);
12242 /* Avoid returning a PARALLEL in the trivial cases. */
12243 if (k == 1)
12245 if (XEXP (rvec[0], 0) == NULL_RTX)
12246 return NULL_RTX;
12248 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12249 return XEXP (rvec[0], 0);
12252 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12255 /* Determine where to put an argument to a function.
12256 Value is zero to push the argument on the stack,
12257 or a hard register in which to store the argument.
12259 MODE is the argument's machine mode.
12260 TYPE is the data type of the argument (as a tree).
12261 This is null for libcalls where that information may
12262 not be available.
12263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12264 the preceding args and about the function being called. It is
12265 not modified in this routine.
12266 NAMED is nonzero if this argument is a named parameter
12267 (otherwise it is an extra parameter matching an ellipsis).
12269 On RS/6000 the first eight words of non-FP are normally in registers
12270 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12271 Under V.4, the first 8 FP args are in registers.
12273 If this is floating-point and no prototype is specified, we use
12274 both an FP and integer register (or possibly FP reg and stack). Library
12275 functions (when CALL_LIBCALL is set) always have the proper types for args,
12276 so we can pass the FP value just in one register. emit_library_function
12277 doesn't support PARALLEL anyway.
12279 Note that for args passed by reference, function_arg will be called
12280 with MODE and TYPE set to that of the pointer to the arg, not the arg
12281 itself. */
12283 static rtx
12284 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12285 const_tree type, bool named)
12287 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12288 enum rs6000_abi abi = DEFAULT_ABI;
12289 machine_mode elt_mode;
12290 int n_elts;
12292 /* Return a marker to indicate whether CR1 needs to set or clear the
12293 bit that V.4 uses to say fp args were passed in registers.
12294 Assume that we don't need the marker for software floating point,
12295 or compiler generated library calls. */
12296 if (mode == VOIDmode)
12298 if (abi == ABI_V4
12299 && (cum->call_cookie & CALL_LIBCALL) == 0
12300 && (cum->stdarg
12301 || (cum->nargs_prototype < 0
12302 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12304 /* For the SPE, we need to crxor CR6 always. */
12305 if (TARGET_SPE_ABI)
12306 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12307 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12308 return GEN_INT (cum->call_cookie
12309 | ((cum->fregno == FP_ARG_MIN_REG)
12310 ? CALL_V4_SET_FP_ARGS
12311 : CALL_V4_CLEAR_FP_ARGS));
12314 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12317 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12319 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12321 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12322 if (rslt != NULL_RTX)
12323 return rslt;
12324 /* Else fall through to usual handling. */
12327 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12329 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12330 rtx r, off;
12331 int i, k = 0;
12333 /* Do we also need to pass this argument in the parameter save area?
12334 Library support functions for IEEE 128-bit are assumed to not need the
12335 value passed both in GPRs and in vector registers. */
12336 if (TARGET_64BIT && !cum->prototype
12337 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12339 int align_words = ROUND_UP (cum->words, 2);
12340 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12343 /* Describe where this argument goes in the vector registers. */
12344 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12346 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12347 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12348 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12351 return rs6000_finish_function_arg (mode, rvec, k);
12353 else if (TARGET_ALTIVEC_ABI
12354 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12355 || (type && TREE_CODE (type) == VECTOR_TYPE
12356 && int_size_in_bytes (type) == 16)))
12358 if (named || abi == ABI_V4)
12359 return NULL_RTX;
12360 else
12362 /* Vector parameters to varargs functions under AIX or Darwin
12363 get passed in memory and possibly also in GPRs. */
12364 int align, align_words, n_words;
12365 machine_mode part_mode;
12367 /* Vector parameters must be 16-byte aligned. In 32-bit
12368 mode this means we need to take into account the offset
12369 to the parameter save area. In 64-bit mode, they just
12370 have to start on an even word, since the parameter save
12371 area is 16-byte aligned. */
12372 if (TARGET_32BIT)
12373 align = -(rs6000_parm_offset () + cum->words) & 3;
12374 else
12375 align = cum->words & 1;
12376 align_words = cum->words + align;
12378 /* Out of registers? Memory, then. */
12379 if (align_words >= GP_ARG_NUM_REG)
12380 return NULL_RTX;
12382 if (TARGET_32BIT && TARGET_POWERPC64)
12383 return rs6000_mixed_function_arg (mode, type, align_words);
12385 /* The vector value goes in GPRs. Only the part of the
12386 value in GPRs is reported here. */
12387 part_mode = mode;
12388 n_words = rs6000_arg_size (mode, type);
12389 if (align_words + n_words > GP_ARG_NUM_REG)
12390 /* Fortunately, there are only two possibilities, the value
12391 is either wholly in GPRs or half in GPRs and half not. */
12392 part_mode = DImode;
12394 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12397 else if (TARGET_SPE_ABI && TARGET_SPE
12398 && (SPE_VECTOR_MODE (mode)
12399 || (TARGET_E500_DOUBLE && (mode == DFmode
12400 || mode == DCmode
12401 || mode == TFmode
12402 || mode == TCmode))))
12403 return rs6000_spe_function_arg (cum, mode, type);
12405 else if (abi == ABI_V4)
12407 if (abi_v4_pass_in_fpr (mode))
12409 /* _Decimal128 must use an even/odd register pair. This assumes
12410 that the register number is odd when fregno is odd. */
12411 if (mode == TDmode && (cum->fregno % 2) == 1)
12412 cum->fregno++;
12414 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12415 <= FP_ARG_V4_MAX_REG)
12416 return gen_rtx_REG (mode, cum->fregno);
12417 else
12418 return NULL_RTX;
12420 else
12422 int n_words = rs6000_arg_size (mode, type);
12423 int gregno = cum->sysv_gregno;
12425 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12426 (r7,r8) or (r9,r10). As does any other 2 word item such
12427 as complex int due to a historical mistake. */
12428 if (n_words == 2)
12429 gregno += (1 - gregno) & 1;
12431 /* Multi-reg args are not split between registers and stack. */
12432 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12433 return NULL_RTX;
12435 if (TARGET_32BIT && TARGET_POWERPC64)
12436 return rs6000_mixed_function_arg (mode, type,
12437 gregno - GP_ARG_MIN_REG);
12438 return gen_rtx_REG (mode, gregno);
12441 else
12443 int align_words = rs6000_parm_start (mode, type, cum->words);
12445 /* _Decimal128 must be passed in an even/odd float register pair.
12446 This assumes that the register number is odd when fregno is odd. */
12447 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12448 cum->fregno++;
12450 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12452 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12453 rtx r, off;
12454 int i, k = 0;
12455 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12456 int fpr_words;
12458 /* Do we also need to pass this argument in the parameter
12459 save area? */
12460 if (type && (cum->nargs_prototype <= 0
12461 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12462 && TARGET_XL_COMPAT
12463 && align_words >= GP_ARG_NUM_REG)))
12464 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12466 /* Describe where this argument goes in the fprs. */
12467 for (i = 0; i < n_elts
12468 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12470 /* Check if the argument is split over registers and memory.
12471 This can only ever happen for long double or _Decimal128;
12472 complex types are handled via split_complex_arg. */
12473 machine_mode fmode = elt_mode;
12474 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12476 gcc_assert (FLOAT128_2REG_P (fmode));
12477 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12480 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12481 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12482 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12485 /* If there were not enough FPRs to hold the argument, the rest
12486 usually goes into memory. However, if the current position
12487 is still within the register parameter area, a portion may
12488 actually have to go into GPRs.
12490 Note that it may happen that the portion of the argument
12491 passed in the first "half" of the first GPR was already
12492 passed in the last FPR as well.
12494 For unnamed arguments, we already set up GPRs to cover the
12495 whole argument in rs6000_psave_function_arg, so there is
12496 nothing further to do at this point. */
12497 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12498 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12499 && cum->nargs_prototype > 0)
12501 static bool warned;
12503 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12504 int n_words = rs6000_arg_size (mode, type);
12506 align_words += fpr_words;
12507 n_words -= fpr_words;
12511 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12512 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12513 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12515 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12517 if (!warned && warn_psabi)
12519 warned = true;
12520 inform (input_location,
12521 "the ABI of passing homogeneous float aggregates"
12522 " has changed in GCC 5");
12526 return rs6000_finish_function_arg (mode, rvec, k);
12528 else if (align_words < GP_ARG_NUM_REG)
12530 if (TARGET_32BIT && TARGET_POWERPC64)
12531 return rs6000_mixed_function_arg (mode, type, align_words);
12533 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12535 else
12536 return NULL_RTX;
12540 /* For an arg passed partly in registers and partly in memory, this is
12541 the number of bytes passed in registers. For args passed entirely in
12542 registers or entirely in memory, zero. When an arg is described by a
12543 PARALLEL, perhaps using more than one register type, this function
12544 returns the number of bytes used by the first element of the PARALLEL. */
12546 static int
12547 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12548 tree type, bool named)
12550 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12551 bool passed_in_gprs = true;
12552 int ret = 0;
12553 int align_words;
12554 machine_mode elt_mode;
12555 int n_elts;
12557 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12559 if (DEFAULT_ABI == ABI_V4)
12560 return 0;
12562 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12564 /* If we are passing this arg in the fixed parameter save area (gprs or
12565 memory) as well as VRs, we do not use the partial bytes mechanism;
12566 instead, rs6000_function_arg will return a PARALLEL including a memory
12567 element as necessary. Library support functions for IEEE 128-bit are
12568 assumed to not need the value passed both in GPRs and in vector
12569 registers. */
12570 if (TARGET_64BIT && !cum->prototype
12571 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12572 return 0;
12574 /* Otherwise, we pass in VRs only. Check for partial copies. */
12575 passed_in_gprs = false;
12576 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12577 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12580 /* In this complicated case we just disable the partial_nregs code. */
12581 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12582 return 0;
12584 align_words = rs6000_parm_start (mode, type, cum->words);
12586 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12588 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12590 /* If we are passing this arg in the fixed parameter save area
12591 (gprs or memory) as well as FPRs, we do not use the partial
12592 bytes mechanism; instead, rs6000_function_arg will return a
12593 PARALLEL including a memory element as necessary. */
12594 if (type
12595 && (cum->nargs_prototype <= 0
12596 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12597 && TARGET_XL_COMPAT
12598 && align_words >= GP_ARG_NUM_REG)))
12599 return 0;
12601 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12602 passed_in_gprs = false;
12603 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12605 /* Compute number of bytes / words passed in FPRs. If there
12606 is still space available in the register parameter area
12607 *after* that amount, a part of the argument will be passed
12608 in GPRs. In that case, the total amount passed in any
12609 registers is equal to the amount that would have been passed
12610 in GPRs if everything were passed there, so we fall back to
12611 the GPR code below to compute the appropriate value. */
12612 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12613 * MIN (8, GET_MODE_SIZE (elt_mode)));
12614 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12616 if (align_words + fpr_words < GP_ARG_NUM_REG)
12617 passed_in_gprs = true;
12618 else
12619 ret = fpr;
12623 if (passed_in_gprs
12624 && align_words < GP_ARG_NUM_REG
12625 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12626 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12628 if (ret != 0 && TARGET_DEBUG_ARG)
12629 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12631 return ret;
12634 /* A C expression that indicates when an argument must be passed by
12635 reference. If nonzero for an argument, a copy of that argument is
12636 made in memory and a pointer to the argument is passed instead of
12637 the argument itself. The pointer is passed in whatever way is
12638 appropriate for passing a pointer to that type.
12640 Under V.4, aggregates and long double are passed by reference.
12642 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12643 reference unless the AltiVec vector extension ABI is in force.
12645 As an extension to all ABIs, variable sized types are passed by
12646 reference. */
12648 static bool
12649 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12650 machine_mode mode, const_tree type,
12651 bool named ATTRIBUTE_UNUSED)
12653 if (!type)
12654 return 0;
12656 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12657 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12659 if (TARGET_DEBUG_ARG)
12660 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12661 return 1;
12664 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12666 if (TARGET_DEBUG_ARG)
12667 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12668 return 1;
12671 if (int_size_in_bytes (type) < 0)
12673 if (TARGET_DEBUG_ARG)
12674 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12675 return 1;
12678 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12679 modes only exist for GCC vector types if -maltivec. */
12680 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12682 if (TARGET_DEBUG_ARG)
12683 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12684 return 1;
12687 /* Pass synthetic vectors in memory. */
12688 if (TREE_CODE (type) == VECTOR_TYPE
12689 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12691 static bool warned_for_pass_big_vectors = false;
12692 if (TARGET_DEBUG_ARG)
12693 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12694 if (!warned_for_pass_big_vectors)
12696 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12697 "non-standard ABI extension with no compatibility guarantee");
12698 warned_for_pass_big_vectors = true;
12700 return 1;
12703 return 0;
12706 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12707 already processes. Return true if the parameter must be passed
12708 (fully or partially) on the stack. */
12710 static bool
12711 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12713 machine_mode mode;
12714 int unsignedp;
12715 rtx entry_parm;
12717 /* Catch errors. */
12718 if (type == NULL || type == error_mark_node)
12719 return true;
12721 /* Handle types with no storage requirement. */
12722 if (TYPE_MODE (type) == VOIDmode)
12723 return false;
12725 /* Handle complex types. */
12726 if (TREE_CODE (type) == COMPLEX_TYPE)
12727 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12728 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12730 /* Handle transparent aggregates. */
12731 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12732 && TYPE_TRANSPARENT_AGGR (type))
12733 type = TREE_TYPE (first_field (type));
12735 /* See if this arg was passed by invisible reference. */
12736 if (pass_by_reference (get_cumulative_args (args_so_far),
12737 TYPE_MODE (type), type, true))
12738 type = build_pointer_type (type);
12740 /* Find mode as it is passed by the ABI. */
12741 unsignedp = TYPE_UNSIGNED (type);
12742 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12744 /* If we must pass in stack, we need a stack. */
12745 if (rs6000_must_pass_in_stack (mode, type))
12746 return true;
12748 /* If there is no incoming register, we need a stack. */
12749 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12750 if (entry_parm == NULL)
12751 return true;
12753 /* Likewise if we need to pass both in registers and on the stack. */
12754 if (GET_CODE (entry_parm) == PARALLEL
12755 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12756 return true;
12758 /* Also true if we're partially in registers and partially not. */
12759 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12760 return true;
12762 /* Update info on where next arg arrives in registers. */
12763 rs6000_function_arg_advance (args_so_far, mode, type, true);
12764 return false;
12767 /* Return true if FUN has no prototype, has a variable argument
12768 list, or passes any parameter in memory. */
12770 static bool
12771 rs6000_function_parms_need_stack (tree fun, bool incoming)
12773 tree fntype, result;
12774 CUMULATIVE_ARGS args_so_far_v;
12775 cumulative_args_t args_so_far;
12777 if (!fun)
12778 /* Must be a libcall, all of which only use reg parms. */
12779 return false;
12781 fntype = fun;
12782 if (!TYPE_P (fun))
12783 fntype = TREE_TYPE (fun);
12785 /* Varargs functions need the parameter save area. */
12786 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12787 return true;
12789 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12790 args_so_far = pack_cumulative_args (&args_so_far_v);
12792 /* When incoming, we will have been passed the function decl.
12793 It is necessary to use the decl to handle K&R style functions,
12794 where TYPE_ARG_TYPES may not be available. */
12795 if (incoming)
12797 gcc_assert (DECL_P (fun));
12798 result = DECL_RESULT (fun);
12800 else
12801 result = TREE_TYPE (fntype);
12803 if (result && aggregate_value_p (result, fntype))
12805 if (!TYPE_P (result))
12806 result = TREE_TYPE (result);
12807 result = build_pointer_type (result);
12808 rs6000_parm_needs_stack (args_so_far, result);
12811 if (incoming)
12813 tree parm;
12815 for (parm = DECL_ARGUMENTS (fun);
12816 parm && parm != void_list_node;
12817 parm = TREE_CHAIN (parm))
12818 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12819 return true;
12821 else
12823 function_args_iterator args_iter;
12824 tree arg_type;
12826 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12827 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12828 return true;
12831 return false;
12834 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12835 usually a constant depending on the ABI. However, in the ELFv2 ABI
12836 the register parameter area is optional when calling a function that
12837 has a prototype is scope, has no variable argument list, and passes
12838 all parameters in registers. */
12841 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12843 int reg_parm_stack_space;
12845 switch (DEFAULT_ABI)
12847 default:
12848 reg_parm_stack_space = 0;
12849 break;
12851 case ABI_AIX:
12852 case ABI_DARWIN:
12853 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12854 break;
12856 case ABI_ELFv2:
12857 /* ??? Recomputing this every time is a bit expensive. Is there
12858 a place to cache this information? */
12859 if (rs6000_function_parms_need_stack (fun, incoming))
12860 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12861 else
12862 reg_parm_stack_space = 0;
12863 break;
12866 return reg_parm_stack_space;
12869 static void
12870 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12872 int i;
12873 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12875 if (nregs == 0)
12876 return;
12878 for (i = 0; i < nregs; i++)
12880 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12881 if (reload_completed)
12883 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12884 tem = NULL_RTX;
12885 else
12886 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12887 i * GET_MODE_SIZE (reg_mode));
12889 else
12890 tem = replace_equiv_address (tem, XEXP (tem, 0));
12892 gcc_assert (tem);
12894 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12898 /* Perform any needed actions needed for a function that is receiving a
12899 variable number of arguments.
12901 CUM is as above.
12903 MODE and TYPE are the mode and type of the current parameter.
12905 PRETEND_SIZE is a variable that should be set to the amount of stack
12906 that must be pushed by the prolog to pretend that our caller pushed
12909 Normally, this macro will push all remaining incoming registers on the
12910 stack and set PRETEND_SIZE to the length of the registers pushed. */
12912 static void
12913 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12914 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12915 int no_rtl)
12917 CUMULATIVE_ARGS next_cum;
12918 int reg_size = TARGET_32BIT ? 4 : 8;
12919 rtx save_area = NULL_RTX, mem;
12920 int first_reg_offset;
12921 alias_set_type set;
12923 /* Skip the last named argument. */
12924 next_cum = *get_cumulative_args (cum);
12925 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12927 if (DEFAULT_ABI == ABI_V4)
12929 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12931 if (! no_rtl)
12933 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12934 HOST_WIDE_INT offset = 0;
12936 /* Try to optimize the size of the varargs save area.
12937 The ABI requires that ap.reg_save_area is doubleword
12938 aligned, but we don't need to allocate space for all
12939 the bytes, only those to which we actually will save
12940 anything. */
12941 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12942 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12943 if (TARGET_HARD_FLOAT && TARGET_FPRS
12944 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12945 && cfun->va_list_fpr_size)
12947 if (gpr_reg_num)
12948 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12949 * UNITS_PER_FP_WORD;
12950 if (cfun->va_list_fpr_size
12951 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12952 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12953 else
12954 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12955 * UNITS_PER_FP_WORD;
12957 if (gpr_reg_num)
12959 offset = -((first_reg_offset * reg_size) & ~7);
12960 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12962 gpr_reg_num = cfun->va_list_gpr_size;
12963 if (reg_size == 4 && (first_reg_offset & 1))
12964 gpr_reg_num++;
12966 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12968 else if (fpr_size)
12969 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12970 * UNITS_PER_FP_WORD
12971 - (int) (GP_ARG_NUM_REG * reg_size);
12973 if (gpr_size + fpr_size)
12975 rtx reg_save_area
12976 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12977 gcc_assert (GET_CODE (reg_save_area) == MEM);
12978 reg_save_area = XEXP (reg_save_area, 0);
12979 if (GET_CODE (reg_save_area) == PLUS)
12981 gcc_assert (XEXP (reg_save_area, 0)
12982 == virtual_stack_vars_rtx);
12983 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12984 offset += INTVAL (XEXP (reg_save_area, 1));
12986 else
12987 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12990 cfun->machine->varargs_save_offset = offset;
12991 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12994 else
12996 first_reg_offset = next_cum.words;
12997 save_area = crtl->args.internal_arg_pointer;
12999 if (targetm.calls.must_pass_in_stack (mode, type))
13000 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13003 set = get_varargs_alias_set ();
13004 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13005 && cfun->va_list_gpr_size)
13007 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13009 if (va_list_gpr_counter_field)
13010 /* V4 va_list_gpr_size counts number of registers needed. */
13011 n_gpr = cfun->va_list_gpr_size;
13012 else
13013 /* char * va_list instead counts number of bytes needed. */
13014 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13016 if (nregs > n_gpr)
13017 nregs = n_gpr;
13019 mem = gen_rtx_MEM (BLKmode,
13020 plus_constant (Pmode, save_area,
13021 first_reg_offset * reg_size));
13022 MEM_NOTRAP_P (mem) = 1;
13023 set_mem_alias_set (mem, set);
13024 set_mem_align (mem, BITS_PER_WORD);
13026 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13027 nregs);
13030 /* Save FP registers if needed. */
13031 if (DEFAULT_ABI == ABI_V4
13032 && TARGET_HARD_FLOAT && TARGET_FPRS
13033 && ! no_rtl
13034 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13035 && cfun->va_list_fpr_size)
13037 int fregno = next_cum.fregno, nregs;
13038 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13039 rtx lab = gen_label_rtx ();
13040 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13041 * UNITS_PER_FP_WORD);
13043 emit_jump_insn
13044 (gen_rtx_SET (pc_rtx,
13045 gen_rtx_IF_THEN_ELSE (VOIDmode,
13046 gen_rtx_NE (VOIDmode, cr1,
13047 const0_rtx),
13048 gen_rtx_LABEL_REF (VOIDmode, lab),
13049 pc_rtx)));
13051 for (nregs = 0;
13052 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13053 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13055 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13056 ? DFmode : SFmode,
13057 plus_constant (Pmode, save_area, off));
13058 MEM_NOTRAP_P (mem) = 1;
13059 set_mem_alias_set (mem, set);
13060 set_mem_align (mem, GET_MODE_ALIGNMENT (
13061 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13062 ? DFmode : SFmode));
13063 emit_move_insn (mem, gen_rtx_REG (
13064 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13065 ? DFmode : SFmode, fregno));
13068 emit_label (lab);
13072 /* Create the va_list data type. */
13074 static tree
13075 rs6000_build_builtin_va_list (void)
13077 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13079 /* For AIX, prefer 'char *' because that's what the system
13080 header files like. */
13081 if (DEFAULT_ABI != ABI_V4)
13082 return build_pointer_type (char_type_node);
13084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13085 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13086 get_identifier ("__va_list_tag"), record);
13088 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13089 unsigned_char_type_node);
13090 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13091 unsigned_char_type_node);
13092 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13093 every user file. */
13094 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13095 get_identifier ("reserved"), short_unsigned_type_node);
13096 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13097 get_identifier ("overflow_arg_area"),
13098 ptr_type_node);
13099 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13100 get_identifier ("reg_save_area"),
13101 ptr_type_node);
13103 va_list_gpr_counter_field = f_gpr;
13104 va_list_fpr_counter_field = f_fpr;
13106 DECL_FIELD_CONTEXT (f_gpr) = record;
13107 DECL_FIELD_CONTEXT (f_fpr) = record;
13108 DECL_FIELD_CONTEXT (f_res) = record;
13109 DECL_FIELD_CONTEXT (f_ovf) = record;
13110 DECL_FIELD_CONTEXT (f_sav) = record;
13112 TYPE_STUB_DECL (record) = type_decl;
13113 TYPE_NAME (record) = type_decl;
13114 TYPE_FIELDS (record) = f_gpr;
13115 DECL_CHAIN (f_gpr) = f_fpr;
13116 DECL_CHAIN (f_fpr) = f_res;
13117 DECL_CHAIN (f_res) = f_ovf;
13118 DECL_CHAIN (f_ovf) = f_sav;
13120 layout_type (record);
13122 /* The correct type is an array type of one element. */
13123 return build_array_type (record, build_index_type (size_zero_node));
13126 /* Implement va_start. */
13128 static void
13129 rs6000_va_start (tree valist, rtx nextarg)
13131 HOST_WIDE_INT words, n_gpr, n_fpr;
13132 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13133 tree gpr, fpr, ovf, sav, t;
13135 /* Only SVR4 needs something special. */
13136 if (DEFAULT_ABI != ABI_V4)
13138 std_expand_builtin_va_start (valist, nextarg);
13139 return;
13142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13143 f_fpr = DECL_CHAIN (f_gpr);
13144 f_res = DECL_CHAIN (f_fpr);
13145 f_ovf = DECL_CHAIN (f_res);
13146 f_sav = DECL_CHAIN (f_ovf);
13148 valist = build_simple_mem_ref (valist);
13149 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13150 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13151 f_fpr, NULL_TREE);
13152 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13153 f_ovf, NULL_TREE);
13154 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13155 f_sav, NULL_TREE);
13157 /* Count number of gp and fp argument registers used. */
13158 words = crtl->args.info.words;
13159 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13160 GP_ARG_NUM_REG);
13161 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13162 FP_ARG_NUM_REG);
13164 if (TARGET_DEBUG_ARG)
13165 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13166 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13167 words, n_gpr, n_fpr);
13169 if (cfun->va_list_gpr_size)
13171 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13172 build_int_cst (NULL_TREE, n_gpr));
13173 TREE_SIDE_EFFECTS (t) = 1;
13174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13177 if (cfun->va_list_fpr_size)
13179 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13180 build_int_cst (NULL_TREE, n_fpr));
13181 TREE_SIDE_EFFECTS (t) = 1;
13182 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13184 #ifdef HAVE_AS_GNU_ATTRIBUTE
13185 if (call_ABI_of_interest (cfun->decl))
13186 rs6000_passes_float = true;
13187 #endif
13190 /* Find the overflow area. */
13191 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13192 if (words != 0)
13193 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13194 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13195 TREE_SIDE_EFFECTS (t) = 1;
13196 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13198 /* If there were no va_arg invocations, don't set up the register
13199 save area. */
13200 if (!cfun->va_list_gpr_size
13201 && !cfun->va_list_fpr_size
13202 && n_gpr < GP_ARG_NUM_REG
13203 && n_fpr < FP_ARG_V4_MAX_REG)
13204 return;
13206 /* Find the register save area. */
13207 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13208 if (cfun->machine->varargs_save_offset)
13209 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13210 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13211 TREE_SIDE_EFFECTS (t) = 1;
13212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13215 /* Implement va_arg. */
13217 static tree
13218 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13219 gimple_seq *post_p)
13221 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13222 tree gpr, fpr, ovf, sav, reg, t, u;
13223 int size, rsize, n_reg, sav_ofs, sav_scale;
13224 tree lab_false, lab_over, addr;
13225 int align;
13226 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13227 int regalign = 0;
13228 gimple *stmt;
13230 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13232 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13233 return build_va_arg_indirect_ref (t);
13236 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13237 earlier version of gcc, with the property that it always applied alignment
13238 adjustments to the va-args (even for zero-sized types). The cheapest way
13239 to deal with this is to replicate the effect of the part of
13240 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13241 of relevance.
13242 We don't need to check for pass-by-reference because of the test above.
13243 We can return a simplifed answer, since we know there's no offset to add. */
13245 if (((TARGET_MACHO
13246 && rs6000_darwin64_abi)
13247 || DEFAULT_ABI == ABI_ELFv2
13248 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13249 && integer_zerop (TYPE_SIZE (type)))
13251 unsigned HOST_WIDE_INT align, boundary;
13252 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13253 align = PARM_BOUNDARY / BITS_PER_UNIT;
13254 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13255 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13256 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13257 boundary /= BITS_PER_UNIT;
13258 if (boundary > align)
13260 tree t ;
13261 /* This updates arg ptr by the amount that would be necessary
13262 to align the zero-sized (but not zero-alignment) item. */
13263 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13264 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13265 gimplify_and_add (t, pre_p);
13267 t = fold_convert (sizetype, valist_tmp);
13268 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13269 fold_convert (TREE_TYPE (valist),
13270 fold_build2 (BIT_AND_EXPR, sizetype, t,
13271 size_int (-boundary))));
13272 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13273 gimplify_and_add (t, pre_p);
13275 /* Since it is zero-sized there's no increment for the item itself. */
13276 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13277 return build_va_arg_indirect_ref (valist_tmp);
13280 if (DEFAULT_ABI != ABI_V4)
13282 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13284 tree elem_type = TREE_TYPE (type);
13285 machine_mode elem_mode = TYPE_MODE (elem_type);
13286 int elem_size = GET_MODE_SIZE (elem_mode);
13288 if (elem_size < UNITS_PER_WORD)
13290 tree real_part, imag_part;
13291 gimple_seq post = NULL;
13293 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13294 &post);
13295 /* Copy the value into a temporary, lest the formal temporary
13296 be reused out from under us. */
13297 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13298 gimple_seq_add_seq (pre_p, post);
13300 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13301 post_p);
13303 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13307 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13310 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13311 f_fpr = DECL_CHAIN (f_gpr);
13312 f_res = DECL_CHAIN (f_fpr);
13313 f_ovf = DECL_CHAIN (f_res);
13314 f_sav = DECL_CHAIN (f_ovf);
13316 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13317 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13318 f_fpr, NULL_TREE);
13319 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13320 f_ovf, NULL_TREE);
13321 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13322 f_sav, NULL_TREE);
13324 size = int_size_in_bytes (type);
13325 rsize = (size + 3) / 4;
13326 align = 1;
13328 machine_mode mode = TYPE_MODE (type);
13329 if (abi_v4_pass_in_fpr (mode))
13331 /* FP args go in FP registers, if present. */
13332 reg = fpr;
13333 n_reg = (size + 7) / 8;
13334 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13335 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13336 if (mode != SFmode && mode != SDmode)
13337 align = 8;
13339 else
13341 /* Otherwise into GP registers. */
13342 reg = gpr;
13343 n_reg = rsize;
13344 sav_ofs = 0;
13345 sav_scale = 4;
13346 if (n_reg == 2)
13347 align = 8;
13350 /* Pull the value out of the saved registers.... */
13352 lab_over = NULL;
13353 addr = create_tmp_var (ptr_type_node, "addr");
13355 /* AltiVec vectors never go in registers when -mabi=altivec. */
13356 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13357 align = 16;
13358 else
13360 lab_false = create_artificial_label (input_location);
13361 lab_over = create_artificial_label (input_location);
13363 /* Long long and SPE vectors are aligned in the registers.
13364 As are any other 2 gpr item such as complex int due to a
13365 historical mistake. */
13366 u = reg;
13367 if (n_reg == 2 && reg == gpr)
13369 regalign = 1;
13370 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13371 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13372 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13373 unshare_expr (reg), u);
13375 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13376 reg number is 0 for f1, so we want to make it odd. */
13377 else if (reg == fpr && mode == TDmode)
13379 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13380 build_int_cst (TREE_TYPE (reg), 1));
13381 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13384 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13385 t = build2 (GE_EXPR, boolean_type_node, u, t);
13386 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13387 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13388 gimplify_and_add (t, pre_p);
13390 t = sav;
13391 if (sav_ofs)
13392 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13394 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13395 build_int_cst (TREE_TYPE (reg), n_reg));
13396 u = fold_convert (sizetype, u);
13397 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13398 t = fold_build_pointer_plus (t, u);
13400 /* _Decimal32 varargs are located in the second word of the 64-bit
13401 FP register for 32-bit binaries. */
13402 if (TARGET_32BIT
13403 && TARGET_HARD_FLOAT && TARGET_FPRS
13404 && mode == SDmode)
13405 t = fold_build_pointer_plus_hwi (t, size);
13407 gimplify_assign (addr, t, pre_p);
13409 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13411 stmt = gimple_build_label (lab_false);
13412 gimple_seq_add_stmt (pre_p, stmt);
13414 if ((n_reg == 2 && !regalign) || n_reg > 2)
13416 /* Ensure that we don't find any more args in regs.
13417 Alignment has taken care of for special cases. */
13418 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13422 /* ... otherwise out of the overflow area. */
13424 /* Care for on-stack alignment if needed. */
13425 t = ovf;
13426 if (align != 1)
13428 t = fold_build_pointer_plus_hwi (t, align - 1);
13429 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13430 build_int_cst (TREE_TYPE (t), -align));
13432 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13434 gimplify_assign (unshare_expr (addr), t, pre_p);
13436 t = fold_build_pointer_plus_hwi (t, size);
13437 gimplify_assign (unshare_expr (ovf), t, pre_p);
13439 if (lab_over)
13441 stmt = gimple_build_label (lab_over);
13442 gimple_seq_add_stmt (pre_p, stmt);
13445 if (STRICT_ALIGNMENT
13446 && (TYPE_ALIGN (type)
13447 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13449 /* The value (of type complex double, for example) may not be
13450 aligned in memory in the saved registers, so copy via a
13451 temporary. (This is the same code as used for SPARC.) */
13452 tree tmp = create_tmp_var (type, "va_arg_tmp");
13453 tree dest_addr = build_fold_addr_expr (tmp);
13455 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13456 3, dest_addr, addr, size_int (rsize * 4));
13458 gimplify_and_add (copy, pre_p);
13459 addr = dest_addr;
13462 addr = fold_convert (ptrtype, addr);
13463 return build_va_arg_indirect_ref (addr);
13466 /* Builtins. */
13468 static void
13469 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13471 tree t;
13472 unsigned classify = rs6000_builtin_info[(int)code].attr;
13473 const char *attr_string = "";
13475 gcc_assert (name != NULL);
13476 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13478 if (rs6000_builtin_decls[(int)code])
13479 fatal_error (input_location,
13480 "internal error: builtin function %s already processed", name);
13482 rs6000_builtin_decls[(int)code] = t =
13483 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13485 /* Set any special attributes. */
13486 if ((classify & RS6000_BTC_CONST) != 0)
13488 /* const function, function only depends on the inputs. */
13489 TREE_READONLY (t) = 1;
13490 TREE_NOTHROW (t) = 1;
13491 attr_string = ", const";
13493 else if ((classify & RS6000_BTC_PURE) != 0)
13495 /* pure function, function can read global memory, but does not set any
13496 external state. */
13497 DECL_PURE_P (t) = 1;
13498 TREE_NOTHROW (t) = 1;
13499 attr_string = ", pure";
13501 else if ((classify & RS6000_BTC_FP) != 0)
13503 /* Function is a math function. If rounding mode is on, then treat the
13504 function as not reading global memory, but it can have arbitrary side
13505 effects. If it is off, then assume the function is a const function.
13506 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13507 builtin-attribute.def that is used for the math functions. */
13508 TREE_NOTHROW (t) = 1;
13509 if (flag_rounding_math)
13511 DECL_PURE_P (t) = 1;
13512 DECL_IS_NOVOPS (t) = 1;
13513 attr_string = ", fp, pure";
13515 else
13517 TREE_READONLY (t) = 1;
13518 attr_string = ", fp, const";
13521 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13522 gcc_unreachable ();
13524 if (TARGET_DEBUG_BUILTIN)
13525 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13526 (int)code, name, attr_string);
13529 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13531 #undef RS6000_BUILTIN_0
13532 #undef RS6000_BUILTIN_1
13533 #undef RS6000_BUILTIN_2
13534 #undef RS6000_BUILTIN_3
13535 #undef RS6000_BUILTIN_A
13536 #undef RS6000_BUILTIN_D
13537 #undef RS6000_BUILTIN_E
13538 #undef RS6000_BUILTIN_H
13539 #undef RS6000_BUILTIN_P
13540 #undef RS6000_BUILTIN_Q
13541 #undef RS6000_BUILTIN_S
13542 #undef RS6000_BUILTIN_X
13544 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13545 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13546 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13547 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13548 { MASK, ICODE, NAME, ENUM },
13550 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13551 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13552 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13553 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13554 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13555 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13556 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13557 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13559 static const struct builtin_description bdesc_3arg[] =
13561 #include "rs6000-builtin.def"
13564 /* DST operations: void foo (void *, const int, const char). */
13566 #undef RS6000_BUILTIN_0
13567 #undef RS6000_BUILTIN_1
13568 #undef RS6000_BUILTIN_2
13569 #undef RS6000_BUILTIN_3
13570 #undef RS6000_BUILTIN_A
13571 #undef RS6000_BUILTIN_D
13572 #undef RS6000_BUILTIN_E
13573 #undef RS6000_BUILTIN_H
13574 #undef RS6000_BUILTIN_P
13575 #undef RS6000_BUILTIN_Q
13576 #undef RS6000_BUILTIN_S
13577 #undef RS6000_BUILTIN_X
13579 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13580 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13581 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13582 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13583 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13584 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13585 { MASK, ICODE, NAME, ENUM },
13587 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13588 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13589 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13590 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13591 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13592 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13594 static const struct builtin_description bdesc_dst[] =
13596 #include "rs6000-builtin.def"
13599 /* Simple binary operations: VECc = foo (VECa, VECb). */
13601 #undef RS6000_BUILTIN_0
13602 #undef RS6000_BUILTIN_1
13603 #undef RS6000_BUILTIN_2
13604 #undef RS6000_BUILTIN_3
13605 #undef RS6000_BUILTIN_A
13606 #undef RS6000_BUILTIN_D
13607 #undef RS6000_BUILTIN_E
13608 #undef RS6000_BUILTIN_H
13609 #undef RS6000_BUILTIN_P
13610 #undef RS6000_BUILTIN_Q
13611 #undef RS6000_BUILTIN_S
13612 #undef RS6000_BUILTIN_X
13614 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13615 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13616 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13617 { MASK, ICODE, NAME, ENUM },
13619 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13620 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13621 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13622 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13623 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13624 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13625 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13626 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13627 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13629 static const struct builtin_description bdesc_2arg[] =
13631 #include "rs6000-builtin.def"
13634 #undef RS6000_BUILTIN_0
13635 #undef RS6000_BUILTIN_1
13636 #undef RS6000_BUILTIN_2
13637 #undef RS6000_BUILTIN_3
13638 #undef RS6000_BUILTIN_A
13639 #undef RS6000_BUILTIN_D
13640 #undef RS6000_BUILTIN_E
13641 #undef RS6000_BUILTIN_H
13642 #undef RS6000_BUILTIN_P
13643 #undef RS6000_BUILTIN_Q
13644 #undef RS6000_BUILTIN_S
13645 #undef RS6000_BUILTIN_X
13647 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13648 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13649 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13650 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13651 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13652 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13653 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13654 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13655 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13656 { MASK, ICODE, NAME, ENUM },
13658 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13659 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13660 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13662 /* AltiVec predicates. */
13664 static const struct builtin_description bdesc_altivec_preds[] =
13666 #include "rs6000-builtin.def"
13669 /* SPE predicates. */
13670 #undef RS6000_BUILTIN_0
13671 #undef RS6000_BUILTIN_1
13672 #undef RS6000_BUILTIN_2
13673 #undef RS6000_BUILTIN_3
13674 #undef RS6000_BUILTIN_A
13675 #undef RS6000_BUILTIN_D
13676 #undef RS6000_BUILTIN_E
13677 #undef RS6000_BUILTIN_H
13678 #undef RS6000_BUILTIN_P
13679 #undef RS6000_BUILTIN_Q
13680 #undef RS6000_BUILTIN_S
13681 #undef RS6000_BUILTIN_X
13683 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13684 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13685 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13686 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13687 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13688 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13689 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13690 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13691 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13692 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13693 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13694 { MASK, ICODE, NAME, ENUM },
13696 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13698 static const struct builtin_description bdesc_spe_predicates[] =
13700 #include "rs6000-builtin.def"
13703 /* SPE evsel predicates. */
13704 #undef RS6000_BUILTIN_0
13705 #undef RS6000_BUILTIN_1
13706 #undef RS6000_BUILTIN_2
13707 #undef RS6000_BUILTIN_3
13708 #undef RS6000_BUILTIN_A
13709 #undef RS6000_BUILTIN_D
13710 #undef RS6000_BUILTIN_E
13711 #undef RS6000_BUILTIN_H
13712 #undef RS6000_BUILTIN_P
13713 #undef RS6000_BUILTIN_Q
13714 #undef RS6000_BUILTIN_S
13715 #undef RS6000_BUILTIN_X
13717 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13718 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13719 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13720 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13723 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13724 { MASK, ICODE, NAME, ENUM },
13726 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13727 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13728 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13729 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13730 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13732 static const struct builtin_description bdesc_spe_evsel[] =
13734 #include "rs6000-builtin.def"
13737 /* PAIRED predicates. */
13738 #undef RS6000_BUILTIN_0
13739 #undef RS6000_BUILTIN_1
13740 #undef RS6000_BUILTIN_2
13741 #undef RS6000_BUILTIN_3
13742 #undef RS6000_BUILTIN_A
13743 #undef RS6000_BUILTIN_D
13744 #undef RS6000_BUILTIN_E
13745 #undef RS6000_BUILTIN_H
13746 #undef RS6000_BUILTIN_P
13747 #undef RS6000_BUILTIN_Q
13748 #undef RS6000_BUILTIN_S
13749 #undef RS6000_BUILTIN_X
13751 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13752 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13753 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13754 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13755 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13756 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13757 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13758 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13759 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13760 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13761 { MASK, ICODE, NAME, ENUM },
13763 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13764 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13766 static const struct builtin_description bdesc_paired_preds[] =
13768 #include "rs6000-builtin.def"
13771 /* ABS* operations. */
13773 #undef RS6000_BUILTIN_0
13774 #undef RS6000_BUILTIN_1
13775 #undef RS6000_BUILTIN_2
13776 #undef RS6000_BUILTIN_3
13777 #undef RS6000_BUILTIN_A
13778 #undef RS6000_BUILTIN_D
13779 #undef RS6000_BUILTIN_E
13780 #undef RS6000_BUILTIN_H
13781 #undef RS6000_BUILTIN_P
13782 #undef RS6000_BUILTIN_Q
13783 #undef RS6000_BUILTIN_S
13784 #undef RS6000_BUILTIN_X
13786 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13787 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13788 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13789 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13790 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13791 { MASK, ICODE, NAME, ENUM },
13793 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13794 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13795 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13796 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13797 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13798 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13799 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13801 static const struct builtin_description bdesc_abs[] =
13803 #include "rs6000-builtin.def"
13806 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13807 foo (VECa). */
13809 #undef RS6000_BUILTIN_0
13810 #undef RS6000_BUILTIN_1
13811 #undef RS6000_BUILTIN_2
13812 #undef RS6000_BUILTIN_3
13813 #undef RS6000_BUILTIN_A
13814 #undef RS6000_BUILTIN_D
13815 #undef RS6000_BUILTIN_E
13816 #undef RS6000_BUILTIN_H
13817 #undef RS6000_BUILTIN_P
13818 #undef RS6000_BUILTIN_Q
13819 #undef RS6000_BUILTIN_S
13820 #undef RS6000_BUILTIN_X
13822 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13823 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13824 { MASK, ICODE, NAME, ENUM },
13826 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13827 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13828 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13829 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13830 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13831 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13832 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13833 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13834 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13835 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13837 static const struct builtin_description bdesc_1arg[] =
13839 #include "rs6000-builtin.def"
13842 /* Simple no-argument operations: result = __builtin_darn_32 () */
13844 #undef RS6000_BUILTIN_0
13845 #undef RS6000_BUILTIN_1
13846 #undef RS6000_BUILTIN_2
13847 #undef RS6000_BUILTIN_3
13848 #undef RS6000_BUILTIN_A
13849 #undef RS6000_BUILTIN_D
13850 #undef RS6000_BUILTIN_E
13851 #undef RS6000_BUILTIN_H
13852 #undef RS6000_BUILTIN_P
13853 #undef RS6000_BUILTIN_Q
13854 #undef RS6000_BUILTIN_S
13855 #undef RS6000_BUILTIN_X
13857 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13858 { MASK, ICODE, NAME, ENUM },
13860 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13861 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13862 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13863 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13864 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13865 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13866 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13867 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13868 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13869 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13870 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13872 static const struct builtin_description bdesc_0arg[] =
13874 #include "rs6000-builtin.def"
13877 /* HTM builtins. */
13878 #undef RS6000_BUILTIN_0
13879 #undef RS6000_BUILTIN_1
13880 #undef RS6000_BUILTIN_2
13881 #undef RS6000_BUILTIN_3
13882 #undef RS6000_BUILTIN_A
13883 #undef RS6000_BUILTIN_D
13884 #undef RS6000_BUILTIN_E
13885 #undef RS6000_BUILTIN_H
13886 #undef RS6000_BUILTIN_P
13887 #undef RS6000_BUILTIN_Q
13888 #undef RS6000_BUILTIN_S
13889 #undef RS6000_BUILTIN_X
13891 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13892 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13893 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13894 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13895 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13896 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13897 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13898 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13899 { MASK, ICODE, NAME, ENUM },
13901 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13902 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13903 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13904 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13906 static const struct builtin_description bdesc_htm[] =
13908 #include "rs6000-builtin.def"
13911 #undef RS6000_BUILTIN_0
13912 #undef RS6000_BUILTIN_1
13913 #undef RS6000_BUILTIN_2
13914 #undef RS6000_BUILTIN_3
13915 #undef RS6000_BUILTIN_A
13916 #undef RS6000_BUILTIN_D
13917 #undef RS6000_BUILTIN_E
13918 #undef RS6000_BUILTIN_H
13919 #undef RS6000_BUILTIN_P
13920 #undef RS6000_BUILTIN_Q
13921 #undef RS6000_BUILTIN_S
13923 /* Return true if a builtin function is overloaded. */
13924 bool
13925 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13927 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13930 const char *
13931 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13933 return rs6000_builtin_info[(int)fncode].name;
13936 /* Expand an expression EXP that calls a builtin without arguments. */
13937 static rtx
13938 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13940 rtx pat;
13941 machine_mode tmode = insn_data[icode].operand[0].mode;
13943 if (icode == CODE_FOR_nothing)
13944 /* Builtin not supported on this processor. */
13945 return 0;
13947 if (target == 0
13948 || GET_MODE (target) != tmode
13949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950 target = gen_reg_rtx (tmode);
13952 pat = GEN_FCN (icode) (target);
13953 if (! pat)
13954 return 0;
13955 emit_insn (pat);
13957 return target;
13961 static rtx
13962 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13964 rtx pat;
13965 tree arg0 = CALL_EXPR_ARG (exp, 0);
13966 tree arg1 = CALL_EXPR_ARG (exp, 1);
13967 rtx op0 = expand_normal (arg0);
13968 rtx op1 = expand_normal (arg1);
13969 machine_mode mode0 = insn_data[icode].operand[0].mode;
13970 machine_mode mode1 = insn_data[icode].operand[1].mode;
13972 if (icode == CODE_FOR_nothing)
13973 /* Builtin not supported on this processor. */
13974 return 0;
13976 /* If we got invalid arguments bail out before generating bad rtl. */
13977 if (arg0 == error_mark_node || arg1 == error_mark_node)
13978 return const0_rtx;
13980 if (GET_CODE (op0) != CONST_INT
13981 || INTVAL (op0) > 255
13982 || INTVAL (op0) < 0)
13984 error ("argument 1 must be an 8-bit field value");
13985 return const0_rtx;
13988 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (mode0, op0);
13991 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13992 op1 = copy_to_mode_reg (mode1, op1);
13994 pat = GEN_FCN (icode) (op0, op1);
13995 if (! pat)
13996 return const0_rtx;
13997 emit_insn (pat);
13999 return NULL_RTX;
14002 static rtx
14003 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14005 rtx pat;
14006 tree arg0 = CALL_EXPR_ARG (exp, 0);
14007 rtx op0 = expand_normal (arg0);
14008 machine_mode tmode = insn_data[icode].operand[0].mode;
14009 machine_mode mode0 = insn_data[icode].operand[1].mode;
14011 if (icode == CODE_FOR_nothing)
14012 /* Builtin not supported on this processor. */
14013 return 0;
14015 /* If we got invalid arguments bail out before generating bad rtl. */
14016 if (arg0 == error_mark_node)
14017 return const0_rtx;
14019 if (icode == CODE_FOR_altivec_vspltisb
14020 || icode == CODE_FOR_altivec_vspltish
14021 || icode == CODE_FOR_altivec_vspltisw
14022 || icode == CODE_FOR_spe_evsplatfi
14023 || icode == CODE_FOR_spe_evsplati)
14025 /* Only allow 5-bit *signed* literals. */
14026 if (GET_CODE (op0) != CONST_INT
14027 || INTVAL (op0) > 15
14028 || INTVAL (op0) < -16)
14030 error ("argument 1 must be a 5-bit signed literal");
14031 return const0_rtx;
14035 if (target == 0
14036 || GET_MODE (target) != tmode
14037 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14038 target = gen_reg_rtx (tmode);
14040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14041 op0 = copy_to_mode_reg (mode0, op0);
14043 pat = GEN_FCN (icode) (target, op0);
14044 if (! pat)
14045 return 0;
14046 emit_insn (pat);
14048 return target;
14051 static rtx
14052 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14054 rtx pat, scratch1, scratch2;
14055 tree arg0 = CALL_EXPR_ARG (exp, 0);
14056 rtx op0 = expand_normal (arg0);
14057 machine_mode tmode = insn_data[icode].operand[0].mode;
14058 machine_mode mode0 = insn_data[icode].operand[1].mode;
14060 /* If we have invalid arguments, bail out before generating bad rtl. */
14061 if (arg0 == error_mark_node)
14062 return const0_rtx;
14064 if (target == 0
14065 || GET_MODE (target) != tmode
14066 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14067 target = gen_reg_rtx (tmode);
14069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14070 op0 = copy_to_mode_reg (mode0, op0);
14072 scratch1 = gen_reg_rtx (mode0);
14073 scratch2 = gen_reg_rtx (mode0);
14075 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14076 if (! pat)
14077 return 0;
14078 emit_insn (pat);
14080 return target;
14083 static rtx
14084 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14086 rtx pat;
14087 tree arg0 = CALL_EXPR_ARG (exp, 0);
14088 tree arg1 = CALL_EXPR_ARG (exp, 1);
14089 rtx op0 = expand_normal (arg0);
14090 rtx op1 = expand_normal (arg1);
14091 machine_mode tmode = insn_data[icode].operand[0].mode;
14092 machine_mode mode0 = insn_data[icode].operand[1].mode;
14093 machine_mode mode1 = insn_data[icode].operand[2].mode;
14095 if (icode == CODE_FOR_nothing)
14096 /* Builtin not supported on this processor. */
14097 return 0;
14099 /* If we got invalid arguments bail out before generating bad rtl. */
14100 if (arg0 == error_mark_node || arg1 == error_mark_node)
14101 return const0_rtx;
14103 if (icode == CODE_FOR_altivec_vcfux
14104 || icode == CODE_FOR_altivec_vcfsx
14105 || icode == CODE_FOR_altivec_vctsxs
14106 || icode == CODE_FOR_altivec_vctuxs
14107 || icode == CODE_FOR_altivec_vspltb
14108 || icode == CODE_FOR_altivec_vsplth
14109 || icode == CODE_FOR_altivec_vspltw
14110 || icode == CODE_FOR_spe_evaddiw
14111 || icode == CODE_FOR_spe_evldd
14112 || icode == CODE_FOR_spe_evldh
14113 || icode == CODE_FOR_spe_evldw
14114 || icode == CODE_FOR_spe_evlhhesplat
14115 || icode == CODE_FOR_spe_evlhhossplat
14116 || icode == CODE_FOR_spe_evlhhousplat
14117 || icode == CODE_FOR_spe_evlwhe
14118 || icode == CODE_FOR_spe_evlwhos
14119 || icode == CODE_FOR_spe_evlwhou
14120 || icode == CODE_FOR_spe_evlwhsplat
14121 || icode == CODE_FOR_spe_evlwwsplat
14122 || icode == CODE_FOR_spe_evrlwi
14123 || icode == CODE_FOR_spe_evslwi
14124 || icode == CODE_FOR_spe_evsrwis
14125 || icode == CODE_FOR_spe_evsubifw
14126 || icode == CODE_FOR_spe_evsrwiu)
14128 /* Only allow 5-bit unsigned literals. */
14129 STRIP_NOPS (arg1);
14130 if (TREE_CODE (arg1) != INTEGER_CST
14131 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14133 error ("argument 2 must be a 5-bit unsigned literal");
14134 return const0_rtx;
14137 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14138 || icode == CODE_FOR_dfptstsfi_lt_dd
14139 || icode == CODE_FOR_dfptstsfi_gt_dd
14140 || icode == CODE_FOR_dfptstsfi_unordered_dd
14141 || icode == CODE_FOR_dfptstsfi_eq_td
14142 || icode == CODE_FOR_dfptstsfi_lt_td
14143 || icode == CODE_FOR_dfptstsfi_gt_td
14144 || icode == CODE_FOR_dfptstsfi_unordered_td)
14146 /* Only allow 6-bit unsigned literals. */
14147 STRIP_NOPS (arg0);
14148 if (TREE_CODE (arg0) != INTEGER_CST
14149 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14151 error ("argument 1 must be a 6-bit unsigned literal");
14152 return CONST0_RTX (tmode);
14155 else if (icode == CODE_FOR_xststdcdp
14156 || icode == CODE_FOR_xststdcsp
14157 || icode == CODE_FOR_xvtstdcdp
14158 || icode == CODE_FOR_xvtstdcsp)
14160 /* Only allow 7-bit unsigned literals. */
14161 STRIP_NOPS (arg1);
14162 if (TREE_CODE (arg1) != INTEGER_CST
14163 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14165 error ("argument 2 must be a 7-bit unsigned literal");
14166 return CONST0_RTX (tmode);
14170 if (target == 0
14171 || GET_MODE (target) != tmode
14172 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14173 target = gen_reg_rtx (tmode);
14175 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14176 op0 = copy_to_mode_reg (mode0, op0);
14177 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14178 op1 = copy_to_mode_reg (mode1, op1);
14180 pat = GEN_FCN (icode) (target, op0, op1);
14181 if (! pat)
14182 return 0;
14183 emit_insn (pat);
14185 return target;
14188 static rtx
14189 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14191 rtx pat, scratch;
14192 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14193 tree arg0 = CALL_EXPR_ARG (exp, 1);
14194 tree arg1 = CALL_EXPR_ARG (exp, 2);
14195 rtx op0 = expand_normal (arg0);
14196 rtx op1 = expand_normal (arg1);
14197 machine_mode tmode = SImode;
14198 machine_mode mode0 = insn_data[icode].operand[1].mode;
14199 machine_mode mode1 = insn_data[icode].operand[2].mode;
14200 int cr6_form_int;
14202 if (TREE_CODE (cr6_form) != INTEGER_CST)
14204 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14205 return const0_rtx;
14207 else
14208 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14210 gcc_assert (mode0 == mode1);
14212 /* If we have invalid arguments, bail out before generating bad rtl. */
14213 if (arg0 == error_mark_node || arg1 == error_mark_node)
14214 return const0_rtx;
14216 if (target == 0
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14222 op0 = copy_to_mode_reg (mode0, op0);
14223 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14224 op1 = copy_to_mode_reg (mode1, op1);
14226 /* Note that for many of the relevant operations (e.g. cmpne or
14227 cmpeq) with float or double operands, it makes more sense for the
14228 mode of the allocated scratch register to select a vector of
14229 integer. But the choice to copy the mode of operand 0 was made
14230 long ago and there are no plans to change it. */
14231 scratch = gen_reg_rtx (mode0);
14233 pat = GEN_FCN (icode) (scratch, op0, op1);
14234 if (! pat)
14235 return 0;
14236 emit_insn (pat);
14238 /* The vec_any* and vec_all* predicates use the same opcodes for two
14239 different operations, but the bits in CR6 will be different
14240 depending on what information we want. So we have to play tricks
14241 with CR6 to get the right bits out.
14243 If you think this is disgusting, look at the specs for the
14244 AltiVec predicates. */
14246 switch (cr6_form_int)
14248 case 0:
14249 emit_insn (gen_cr6_test_for_zero (target));
14250 break;
14251 case 1:
14252 emit_insn (gen_cr6_test_for_zero_reverse (target));
14253 break;
14254 case 2:
14255 emit_insn (gen_cr6_test_for_lt (target));
14256 break;
14257 case 3:
14258 emit_insn (gen_cr6_test_for_lt_reverse (target));
14259 break;
14260 default:
14261 error ("argument 1 of __builtin_altivec_predicate is out of range");
14262 break;
14265 return target;
14268 static rtx
14269 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14271 rtx pat, addr;
14272 tree arg0 = CALL_EXPR_ARG (exp, 0);
14273 tree arg1 = CALL_EXPR_ARG (exp, 1);
14274 machine_mode tmode = insn_data[icode].operand[0].mode;
14275 machine_mode mode0 = Pmode;
14276 machine_mode mode1 = Pmode;
14277 rtx op0 = expand_normal (arg0);
14278 rtx op1 = expand_normal (arg1);
14280 if (icode == CODE_FOR_nothing)
14281 /* Builtin not supported on this processor. */
14282 return 0;
14284 /* If we got invalid arguments bail out before generating bad rtl. */
14285 if (arg0 == error_mark_node || arg1 == error_mark_node)
14286 return const0_rtx;
14288 if (target == 0
14289 || GET_MODE (target) != tmode
14290 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14291 target = gen_reg_rtx (tmode);
14293 op1 = copy_to_mode_reg (mode1, op1);
14295 if (op0 == const0_rtx)
14297 addr = gen_rtx_MEM (tmode, op1);
14299 else
14301 op0 = copy_to_mode_reg (mode0, op0);
14302 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14305 pat = GEN_FCN (icode) (target, addr);
14307 if (! pat)
14308 return 0;
14309 emit_insn (pat);
14311 return target;
14314 /* Return a constant vector for use as a little-endian permute control vector
14315 to reverse the order of elements of the given vector mode. */
14316 static rtx
14317 swap_selector_for_mode (machine_mode mode)
14319 /* These are little endian vectors, so their elements are reversed
14320 from what you would normally expect for a permute control vector. */
14321 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14322 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14323 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14324 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14325 unsigned int *swaparray, i;
14326 rtx perm[16];
14328 switch (mode)
14330 case V2DFmode:
14331 case V2DImode:
14332 swaparray = swap2;
14333 break;
14334 case V4SFmode:
14335 case V4SImode:
14336 swaparray = swap4;
14337 break;
14338 case V8HImode:
14339 swaparray = swap8;
14340 break;
14341 case V16QImode:
14342 swaparray = swap16;
14343 break;
14344 default:
14345 gcc_unreachable ();
14348 for (i = 0; i < 16; ++i)
14349 perm[i] = GEN_INT (swaparray[i]);
14351 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14354 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14355 with -maltivec=be specified. Issue the load followed by an element-
14356 reversing permute. */
14357 void
14358 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14360 rtx tmp = gen_reg_rtx (mode);
14361 rtx load = gen_rtx_SET (tmp, op1);
14362 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14363 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14364 rtx sel = swap_selector_for_mode (mode);
14365 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14367 gcc_assert (REG_P (op0));
14368 emit_insn (par);
14369 emit_insn (gen_rtx_SET (op0, vperm));
14372 /* Generate code for a "stvxl" built-in for a little endian target with
14373 -maltivec=be specified. Issue the store preceded by an element-reversing
14374 permute. */
14375 void
14376 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14378 rtx tmp = gen_reg_rtx (mode);
14379 rtx store = gen_rtx_SET (op0, tmp);
14380 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14381 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14382 rtx sel = swap_selector_for_mode (mode);
14383 rtx vperm;
14385 gcc_assert (REG_P (op1));
14386 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14387 emit_insn (gen_rtx_SET (tmp, vperm));
14388 emit_insn (par);
14391 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14392 specified. Issue the store preceded by an element-reversing permute. */
14393 void
14394 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14396 machine_mode inner_mode = GET_MODE_INNER (mode);
14397 rtx tmp = gen_reg_rtx (mode);
14398 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14399 rtx sel = swap_selector_for_mode (mode);
14400 rtx vperm;
14402 gcc_assert (REG_P (op1));
14403 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14404 emit_insn (gen_rtx_SET (tmp, vperm));
14405 emit_insn (gen_rtx_SET (op0, stvx));
14408 static rtx
14409 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14411 rtx pat, addr;
14412 tree arg0 = CALL_EXPR_ARG (exp, 0);
14413 tree arg1 = CALL_EXPR_ARG (exp, 1);
14414 machine_mode tmode = insn_data[icode].operand[0].mode;
14415 machine_mode mode0 = Pmode;
14416 machine_mode mode1 = Pmode;
14417 rtx op0 = expand_normal (arg0);
14418 rtx op1 = expand_normal (arg1);
14420 if (icode == CODE_FOR_nothing)
14421 /* Builtin not supported on this processor. */
14422 return 0;
14424 /* If we got invalid arguments bail out before generating bad rtl. */
14425 if (arg0 == error_mark_node || arg1 == error_mark_node)
14426 return const0_rtx;
14428 if (target == 0
14429 || GET_MODE (target) != tmode
14430 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14431 target = gen_reg_rtx (tmode);
14433 op1 = copy_to_mode_reg (mode1, op1);
14435 /* For LVX, express the RTL accurately by ANDing the address with -16.
14436 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14437 so the raw address is fine. */
14438 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14439 || icode == CODE_FOR_altivec_lvx_v2di_2op
14440 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14441 || icode == CODE_FOR_altivec_lvx_v4si_2op
14442 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14443 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14445 rtx rawaddr;
14446 if (op0 == const0_rtx)
14447 rawaddr = op1;
14448 else
14450 op0 = copy_to_mode_reg (mode0, op0);
14451 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14453 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14454 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14456 /* For -maltivec=be, emit the load and follow it up with a
14457 permute to swap the elements. */
14458 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14460 rtx temp = gen_reg_rtx (tmode);
14461 emit_insn (gen_rtx_SET (temp, addr));
14463 rtx sel = swap_selector_for_mode (tmode);
14464 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14465 UNSPEC_VPERM);
14466 emit_insn (gen_rtx_SET (target, vperm));
14468 else
14469 emit_insn (gen_rtx_SET (target, addr));
14471 else
14473 if (op0 == const0_rtx)
14474 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14475 else
14477 op0 = copy_to_mode_reg (mode0, op0);
14478 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14479 gen_rtx_PLUS (Pmode, op1, op0));
14482 pat = GEN_FCN (icode) (target, addr);
14483 if (! pat)
14484 return 0;
14485 emit_insn (pat);
14488 return target;
14491 static rtx
14492 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14494 tree arg0 = CALL_EXPR_ARG (exp, 0);
14495 tree arg1 = CALL_EXPR_ARG (exp, 1);
14496 tree arg2 = CALL_EXPR_ARG (exp, 2);
14497 rtx op0 = expand_normal (arg0);
14498 rtx op1 = expand_normal (arg1);
14499 rtx op2 = expand_normal (arg2);
14500 rtx pat;
14501 machine_mode mode0 = insn_data[icode].operand[0].mode;
14502 machine_mode mode1 = insn_data[icode].operand[1].mode;
14503 machine_mode mode2 = insn_data[icode].operand[2].mode;
14505 /* Invalid arguments. Bail before doing anything stoopid! */
14506 if (arg0 == error_mark_node
14507 || arg1 == error_mark_node
14508 || arg2 == error_mark_node)
14509 return const0_rtx;
14511 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14512 op0 = copy_to_mode_reg (mode2, op0);
14513 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14514 op1 = copy_to_mode_reg (mode0, op1);
14515 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14516 op2 = copy_to_mode_reg (mode1, op2);
14518 pat = GEN_FCN (icode) (op1, op2, op0);
14519 if (pat)
14520 emit_insn (pat);
14521 return NULL_RTX;
14524 static rtx
14525 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14527 tree arg0 = CALL_EXPR_ARG (exp, 0);
14528 tree arg1 = CALL_EXPR_ARG (exp, 1);
14529 tree arg2 = CALL_EXPR_ARG (exp, 2);
14530 rtx op0 = expand_normal (arg0);
14531 rtx op1 = expand_normal (arg1);
14532 rtx op2 = expand_normal (arg2);
14533 rtx pat, addr;
14534 machine_mode tmode = insn_data[icode].operand[0].mode;
14535 machine_mode mode1 = Pmode;
14536 machine_mode mode2 = Pmode;
14538 /* Invalid arguments. Bail before doing anything stoopid! */
14539 if (arg0 == error_mark_node
14540 || arg1 == error_mark_node
14541 || arg2 == error_mark_node)
14542 return const0_rtx;
14544 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14545 op0 = copy_to_mode_reg (tmode, op0);
14547 op2 = copy_to_mode_reg (mode2, op2);
14549 if (op1 == const0_rtx)
14551 addr = gen_rtx_MEM (tmode, op2);
14553 else
14555 op1 = copy_to_mode_reg (mode1, op1);
14556 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14559 pat = GEN_FCN (icode) (addr, op0);
14560 if (pat)
14561 emit_insn (pat);
14562 return NULL_RTX;
14565 static rtx
14566 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14568 rtx pat;
14569 tree arg0 = CALL_EXPR_ARG (exp, 0);
14570 tree arg1 = CALL_EXPR_ARG (exp, 1);
14571 tree arg2 = CALL_EXPR_ARG (exp, 2);
14572 rtx op0 = expand_normal (arg0);
14573 rtx op1 = expand_normal (arg1);
14574 rtx op2 = expand_normal (arg2);
14575 machine_mode mode0 = insn_data[icode].operand[0].mode;
14576 machine_mode mode1 = insn_data[icode].operand[1].mode;
14577 machine_mode mode2 = insn_data[icode].operand[2].mode;
14579 if (icode == CODE_FOR_nothing)
14580 /* Builtin not supported on this processor. */
14581 return NULL_RTX;
14583 /* If we got invalid arguments bail out before generating bad rtl. */
14584 if (arg0 == error_mark_node
14585 || arg1 == error_mark_node
14586 || arg2 == error_mark_node)
14587 return NULL_RTX;
14589 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14590 op0 = copy_to_mode_reg (mode0, op0);
14591 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14592 op1 = copy_to_mode_reg (mode1, op1);
14593 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14594 op2 = copy_to_mode_reg (mode2, op2);
14596 pat = GEN_FCN (icode) (op0, op1, op2);
14597 if (pat)
14598 emit_insn (pat);
14600 return NULL_RTX;
14603 static rtx
14604 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14606 tree arg0 = CALL_EXPR_ARG (exp, 0);
14607 tree arg1 = CALL_EXPR_ARG (exp, 1);
14608 tree arg2 = CALL_EXPR_ARG (exp, 2);
14609 rtx op0 = expand_normal (arg0);
14610 rtx op1 = expand_normal (arg1);
14611 rtx op2 = expand_normal (arg2);
14612 rtx pat, addr, rawaddr;
14613 machine_mode tmode = insn_data[icode].operand[0].mode;
14614 machine_mode smode = insn_data[icode].operand[1].mode;
14615 machine_mode mode1 = Pmode;
14616 machine_mode mode2 = Pmode;
14618 /* Invalid arguments. Bail before doing anything stoopid! */
14619 if (arg0 == error_mark_node
14620 || arg1 == error_mark_node
14621 || arg2 == error_mark_node)
14622 return const0_rtx;
14624 op2 = copy_to_mode_reg (mode2, op2);
14626 /* For STVX, express the RTL accurately by ANDing the address with -16.
14627 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14628 so the raw address is fine. */
14629 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14630 || icode == CODE_FOR_altivec_stvx_v2di_2op
14631 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14632 || icode == CODE_FOR_altivec_stvx_v4si_2op
14633 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14634 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14636 if (op1 == const0_rtx)
14637 rawaddr = op2;
14638 else
14640 op1 = copy_to_mode_reg (mode1, op1);
14641 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14644 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14645 addr = gen_rtx_MEM (tmode, addr);
14647 op0 = copy_to_mode_reg (tmode, op0);
14649 /* For -maltivec=be, emit a permute to swap the elements, followed
14650 by the store. */
14651 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14653 rtx temp = gen_reg_rtx (tmode);
14654 rtx sel = swap_selector_for_mode (tmode);
14655 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14656 UNSPEC_VPERM);
14657 emit_insn (gen_rtx_SET (temp, vperm));
14658 emit_insn (gen_rtx_SET (addr, temp));
14660 else
14661 emit_insn (gen_rtx_SET (addr, op0));
14663 else
14665 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14666 op0 = copy_to_mode_reg (smode, op0);
14668 if (op1 == const0_rtx)
14669 addr = gen_rtx_MEM (tmode, op2);
14670 else
14672 op1 = copy_to_mode_reg (mode1, op1);
14673 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14676 pat = GEN_FCN (icode) (addr, op0);
14677 if (pat)
14678 emit_insn (pat);
14681 return NULL_RTX;
14684 /* Return the appropriate SPR number associated with the given builtin. */
14685 static inline HOST_WIDE_INT
14686 htm_spr_num (enum rs6000_builtins code)
14688 if (code == HTM_BUILTIN_GET_TFHAR
14689 || code == HTM_BUILTIN_SET_TFHAR)
14690 return TFHAR_SPR;
14691 else if (code == HTM_BUILTIN_GET_TFIAR
14692 || code == HTM_BUILTIN_SET_TFIAR)
14693 return TFIAR_SPR;
14694 else if (code == HTM_BUILTIN_GET_TEXASR
14695 || code == HTM_BUILTIN_SET_TEXASR)
14696 return TEXASR_SPR;
14697 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14698 || code == HTM_BUILTIN_SET_TEXASRU);
14699 return TEXASRU_SPR;
14702 /* Return the appropriate SPR regno associated with the given builtin. */
14703 static inline HOST_WIDE_INT
14704 htm_spr_regno (enum rs6000_builtins code)
14706 if (code == HTM_BUILTIN_GET_TFHAR
14707 || code == HTM_BUILTIN_SET_TFHAR)
14708 return TFHAR_REGNO;
14709 else if (code == HTM_BUILTIN_GET_TFIAR
14710 || code == HTM_BUILTIN_SET_TFIAR)
14711 return TFIAR_REGNO;
14712 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14713 || code == HTM_BUILTIN_SET_TEXASR
14714 || code == HTM_BUILTIN_GET_TEXASRU
14715 || code == HTM_BUILTIN_SET_TEXASRU);
14716 return TEXASR_REGNO;
14719 /* Return the correct ICODE value depending on whether we are
14720 setting or reading the HTM SPRs. */
14721 static inline enum insn_code
14722 rs6000_htm_spr_icode (bool nonvoid)
14724 if (nonvoid)
14725 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14726 else
14727 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14730 /* Expand the HTM builtin in EXP and store the result in TARGET.
14731 Store true in *EXPANDEDP if we found a builtin to expand. */
14732 static rtx
14733 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14735 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14736 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14737 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14738 const struct builtin_description *d;
14739 size_t i;
14741 *expandedp = true;
14743 if (!TARGET_POWERPC64
14744 && (fcode == HTM_BUILTIN_TABORTDC
14745 || fcode == HTM_BUILTIN_TABORTDCI))
14747 size_t uns_fcode = (size_t)fcode;
14748 const char *name = rs6000_builtin_info[uns_fcode].name;
14749 error ("builtin %s is only valid in 64-bit mode", name);
14750 return const0_rtx;
14753 /* Expand the HTM builtins. */
14754 d = bdesc_htm;
14755 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14756 if (d->code == fcode)
14758 rtx op[MAX_HTM_OPERANDS], pat;
14759 int nopnds = 0;
14760 tree arg;
14761 call_expr_arg_iterator iter;
14762 unsigned attr = rs6000_builtin_info[fcode].attr;
14763 enum insn_code icode = d->icode;
14764 const struct insn_operand_data *insn_op;
14765 bool uses_spr = (attr & RS6000_BTC_SPR);
14766 rtx cr = NULL_RTX;
14768 if (uses_spr)
14769 icode = rs6000_htm_spr_icode (nonvoid);
14770 insn_op = &insn_data[icode].operand[0];
14772 if (nonvoid)
14774 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14775 if (!target
14776 || GET_MODE (target) != tmode
14777 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14778 target = gen_reg_rtx (tmode);
14779 if (uses_spr)
14780 op[nopnds++] = target;
14783 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14785 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14786 return const0_rtx;
14788 insn_op = &insn_data[icode].operand[nopnds];
14790 op[nopnds] = expand_normal (arg);
14792 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14794 if (!strcmp (insn_op->constraint, "n"))
14796 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14797 if (!CONST_INT_P (op[nopnds]))
14798 error ("argument %d must be an unsigned literal", arg_num);
14799 else
14800 error ("argument %d is an unsigned literal that is "
14801 "out of range", arg_num);
14802 return const0_rtx;
14804 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14807 nopnds++;
14810 /* Handle the builtins for extended mnemonics. These accept
14811 no arguments, but map to builtins that take arguments. */
14812 switch (fcode)
14814 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14815 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14816 op[nopnds++] = GEN_INT (1);
14817 if (flag_checking)
14818 attr |= RS6000_BTC_UNARY;
14819 break;
14820 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14821 op[nopnds++] = GEN_INT (0);
14822 if (flag_checking)
14823 attr |= RS6000_BTC_UNARY;
14824 break;
14825 default:
14826 break;
14829 /* If this builtin accesses SPRs, then pass in the appropriate
14830 SPR number and SPR regno as the last two operands. */
14831 if (uses_spr)
14833 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14834 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14835 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14837 /* If this builtin accesses a CR, then pass in a scratch
14838 CR as the last operand. */
14839 else if (attr & RS6000_BTC_CR)
14840 { cr = gen_reg_rtx (CCmode);
14841 op[nopnds++] = cr;
14844 if (flag_checking)
14846 int expected_nopnds = 0;
14847 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14848 expected_nopnds = 1;
14849 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14850 expected_nopnds = 2;
14851 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14852 expected_nopnds = 3;
14853 if (!(attr & RS6000_BTC_VOID))
14854 expected_nopnds += 1;
14855 if (uses_spr)
14856 expected_nopnds += 2;
14858 gcc_assert (nopnds == expected_nopnds
14859 && nopnds <= MAX_HTM_OPERANDS);
14862 switch (nopnds)
14864 case 1:
14865 pat = GEN_FCN (icode) (op[0]);
14866 break;
14867 case 2:
14868 pat = GEN_FCN (icode) (op[0], op[1]);
14869 break;
14870 case 3:
14871 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14872 break;
14873 case 4:
14874 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14875 break;
14876 default:
14877 gcc_unreachable ();
14879 if (!pat)
14880 return NULL_RTX;
14881 emit_insn (pat);
14883 if (attr & RS6000_BTC_CR)
14885 if (fcode == HTM_BUILTIN_TBEGIN)
14887 /* Emit code to set TARGET to true or false depending on
14888 whether the tbegin. instruction successfully or failed
14889 to start a transaction. We do this by placing the 1's
14890 complement of CR's EQ bit into TARGET. */
14891 rtx scratch = gen_reg_rtx (SImode);
14892 emit_insn (gen_rtx_SET (scratch,
14893 gen_rtx_EQ (SImode, cr,
14894 const0_rtx)));
14895 emit_insn (gen_rtx_SET (target,
14896 gen_rtx_XOR (SImode, scratch,
14897 GEN_INT (1))));
14899 else
14901 /* Emit code to copy the 4-bit condition register field
14902 CR into the least significant end of register TARGET. */
14903 rtx scratch1 = gen_reg_rtx (SImode);
14904 rtx scratch2 = gen_reg_rtx (SImode);
14905 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14906 emit_insn (gen_movcc (subreg, cr));
14907 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14908 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14912 if (nonvoid)
14913 return target;
14914 return const0_rtx;
14917 *expandedp = false;
14918 return NULL_RTX;
14921 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14923 static rtx
14924 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14925 rtx target)
14927 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14928 if (fcode == RS6000_BUILTIN_CPU_INIT)
14929 return const0_rtx;
14931 if (target == 0 || GET_MODE (target) != SImode)
14932 target = gen_reg_rtx (SImode);
14934 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14935 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14936 if (TREE_CODE (arg) != STRING_CST)
14938 error ("builtin %s only accepts a string argument",
14939 rs6000_builtin_info[(size_t) fcode].name);
14940 return const0_rtx;
14943 if (fcode == RS6000_BUILTIN_CPU_IS)
14945 const char *cpu = TREE_STRING_POINTER (arg);
14946 rtx cpuid = NULL_RTX;
14947 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14948 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14950 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14951 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14952 break;
14954 if (cpuid == NULL_RTX)
14956 /* Invalid CPU argument. */
14957 error ("cpu %s is an invalid argument to builtin %s",
14958 cpu, rs6000_builtin_info[(size_t) fcode].name);
14959 return const0_rtx;
14962 rtx platform = gen_reg_rtx (SImode);
14963 rtx tcbmem = gen_const_mem (SImode,
14964 gen_rtx_PLUS (Pmode,
14965 gen_rtx_REG (Pmode, TLS_REGNUM),
14966 GEN_INT (TCB_PLATFORM_OFFSET)));
14967 emit_move_insn (platform, tcbmem);
14968 emit_insn (gen_eqsi3 (target, platform, cpuid));
14970 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14972 const char *hwcap = TREE_STRING_POINTER (arg);
14973 rtx mask = NULL_RTX;
14974 int hwcap_offset;
14975 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14976 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14978 mask = GEN_INT (cpu_supports_info[i].mask);
14979 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14980 break;
14982 if (mask == NULL_RTX)
14984 /* Invalid HWCAP argument. */
14985 error ("hwcap %s is an invalid argument to builtin %s",
14986 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14987 return const0_rtx;
14990 rtx tcb_hwcap = gen_reg_rtx (SImode);
14991 rtx tcbmem = gen_const_mem (SImode,
14992 gen_rtx_PLUS (Pmode,
14993 gen_rtx_REG (Pmode, TLS_REGNUM),
14994 GEN_INT (hwcap_offset)));
14995 emit_move_insn (tcb_hwcap, tcbmem);
14996 rtx scratch1 = gen_reg_rtx (SImode);
14997 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14998 rtx scratch2 = gen_reg_rtx (SImode);
14999 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15000 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15003 /* Record that we have expanded a CPU builtin, so that we can later
15004 emit a reference to the special symbol exported by LIBC to ensure we
15005 do not link against an old LIBC that doesn't support this feature. */
15006 cpu_builtin_p = true;
15008 #else
15009 /* For old LIBCs, always return FALSE. */
15010 emit_move_insn (target, GEN_INT (0));
15011 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15013 return target;
15016 static rtx
15017 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15019 rtx pat;
15020 tree arg0 = CALL_EXPR_ARG (exp, 0);
15021 tree arg1 = CALL_EXPR_ARG (exp, 1);
15022 tree arg2 = CALL_EXPR_ARG (exp, 2);
15023 rtx op0 = expand_normal (arg0);
15024 rtx op1 = expand_normal (arg1);
15025 rtx op2 = expand_normal (arg2);
15026 machine_mode tmode = insn_data[icode].operand[0].mode;
15027 machine_mode mode0 = insn_data[icode].operand[1].mode;
15028 machine_mode mode1 = insn_data[icode].operand[2].mode;
15029 machine_mode mode2 = insn_data[icode].operand[3].mode;
15031 if (icode == CODE_FOR_nothing)
15032 /* Builtin not supported on this processor. */
15033 return 0;
15035 /* If we got invalid arguments bail out before generating bad rtl. */
15036 if (arg0 == error_mark_node
15037 || arg1 == error_mark_node
15038 || arg2 == error_mark_node)
15039 return const0_rtx;
15041 /* Check and prepare argument depending on the instruction code.
15043 Note that a switch statement instead of the sequence of tests
15044 would be incorrect as many of the CODE_FOR values could be
15045 CODE_FOR_nothing and that would yield multiple alternatives
15046 with identical values. We'd never reach here at runtime in
15047 this case. */
15048 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15049 || icode == CODE_FOR_altivec_vsldoi_v4si
15050 || icode == CODE_FOR_altivec_vsldoi_v8hi
15051 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15053 /* Only allow 4-bit unsigned literals. */
15054 STRIP_NOPS (arg2);
15055 if (TREE_CODE (arg2) != INTEGER_CST
15056 || TREE_INT_CST_LOW (arg2) & ~0xf)
15058 error ("argument 3 must be a 4-bit unsigned literal");
15059 return const0_rtx;
15062 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15063 || icode == CODE_FOR_vsx_xxpermdi_v2di
15064 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15065 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15066 || icode == CODE_FOR_vsx_xxsldwi_v4si
15067 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15068 || icode == CODE_FOR_vsx_xxsldwi_v2di
15069 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15071 /* Only allow 2-bit unsigned literals. */
15072 STRIP_NOPS (arg2);
15073 if (TREE_CODE (arg2) != INTEGER_CST
15074 || TREE_INT_CST_LOW (arg2) & ~0x3)
15076 error ("argument 3 must be a 2-bit unsigned literal");
15077 return const0_rtx;
15080 else if (icode == CODE_FOR_vsx_set_v2df
15081 || icode == CODE_FOR_vsx_set_v2di
15082 || icode == CODE_FOR_bcdadd
15083 || icode == CODE_FOR_bcdadd_lt
15084 || icode == CODE_FOR_bcdadd_eq
15085 || icode == CODE_FOR_bcdadd_gt
15086 || icode == CODE_FOR_bcdsub
15087 || icode == CODE_FOR_bcdsub_lt
15088 || icode == CODE_FOR_bcdsub_eq
15089 || icode == CODE_FOR_bcdsub_gt)
15091 /* Only allow 1-bit unsigned literals. */
15092 STRIP_NOPS (arg2);
15093 if (TREE_CODE (arg2) != INTEGER_CST
15094 || TREE_INT_CST_LOW (arg2) & ~0x1)
15096 error ("argument 3 must be a 1-bit unsigned literal");
15097 return const0_rtx;
15100 else if (icode == CODE_FOR_dfp_ddedpd_dd
15101 || icode == CODE_FOR_dfp_ddedpd_td)
15103 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15104 STRIP_NOPS (arg0);
15105 if (TREE_CODE (arg0) != INTEGER_CST
15106 || TREE_INT_CST_LOW (arg2) & ~0x3)
15108 error ("argument 1 must be 0 or 2");
15109 return const0_rtx;
15112 else if (icode == CODE_FOR_dfp_denbcd_dd
15113 || icode == CODE_FOR_dfp_denbcd_td)
15115 /* Only allow 1-bit unsigned literals. */
15116 STRIP_NOPS (arg0);
15117 if (TREE_CODE (arg0) != INTEGER_CST
15118 || TREE_INT_CST_LOW (arg0) & ~0x1)
15120 error ("argument 1 must be a 1-bit unsigned literal");
15121 return const0_rtx;
15124 else if (icode == CODE_FOR_dfp_dscli_dd
15125 || icode == CODE_FOR_dfp_dscli_td
15126 || icode == CODE_FOR_dfp_dscri_dd
15127 || icode == CODE_FOR_dfp_dscri_td)
15129 /* Only allow 6-bit unsigned literals. */
15130 STRIP_NOPS (arg1);
15131 if (TREE_CODE (arg1) != INTEGER_CST
15132 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15134 error ("argument 2 must be a 6-bit unsigned literal");
15135 return const0_rtx;
15138 else if (icode == CODE_FOR_crypto_vshasigmaw
15139 || icode == CODE_FOR_crypto_vshasigmad)
15141 /* Check whether the 2nd and 3rd arguments are integer constants and in
15142 range and prepare arguments. */
15143 STRIP_NOPS (arg1);
15144 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15146 error ("argument 2 must be 0 or 1");
15147 return const0_rtx;
15150 STRIP_NOPS (arg2);
15151 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
15153 error ("argument 3 must be in the range 0..15");
15154 return const0_rtx;
15158 if (target == 0
15159 || GET_MODE (target) != tmode
15160 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15161 target = gen_reg_rtx (tmode);
15163 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15164 op0 = copy_to_mode_reg (mode0, op0);
15165 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15166 op1 = copy_to_mode_reg (mode1, op1);
15167 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15168 op2 = copy_to_mode_reg (mode2, op2);
15170 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15171 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15172 else
15173 pat = GEN_FCN (icode) (target, op0, op1, op2);
15174 if (! pat)
15175 return 0;
15176 emit_insn (pat);
15178 return target;
15181 /* Expand the lvx builtins. */
15182 static rtx
15183 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15185 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15186 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15187 tree arg0;
15188 machine_mode tmode, mode0;
15189 rtx pat, op0;
15190 enum insn_code icode;
15192 switch (fcode)
15194 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15195 icode = CODE_FOR_vector_altivec_load_v16qi;
15196 break;
15197 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15198 icode = CODE_FOR_vector_altivec_load_v8hi;
15199 break;
15200 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15201 icode = CODE_FOR_vector_altivec_load_v4si;
15202 break;
15203 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15204 icode = CODE_FOR_vector_altivec_load_v4sf;
15205 break;
15206 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15207 icode = CODE_FOR_vector_altivec_load_v2df;
15208 break;
15209 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15210 icode = CODE_FOR_vector_altivec_load_v2di;
15211 break;
15212 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15213 icode = CODE_FOR_vector_altivec_load_v1ti;
15214 break;
15215 default:
15216 *expandedp = false;
15217 return NULL_RTX;
15220 *expandedp = true;
15222 arg0 = CALL_EXPR_ARG (exp, 0);
15223 op0 = expand_normal (arg0);
15224 tmode = insn_data[icode].operand[0].mode;
15225 mode0 = insn_data[icode].operand[1].mode;
15227 if (target == 0
15228 || GET_MODE (target) != tmode
15229 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15230 target = gen_reg_rtx (tmode);
15232 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15235 pat = GEN_FCN (icode) (target, op0);
15236 if (! pat)
15237 return 0;
15238 emit_insn (pat);
15239 return target;
15242 /* Expand the stvx builtins. */
15243 static rtx
15244 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15245 bool *expandedp)
15247 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15248 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15249 tree arg0, arg1;
15250 machine_mode mode0, mode1;
15251 rtx pat, op0, op1;
15252 enum insn_code icode;
15254 switch (fcode)
15256 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15257 icode = CODE_FOR_vector_altivec_store_v16qi;
15258 break;
15259 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15260 icode = CODE_FOR_vector_altivec_store_v8hi;
15261 break;
15262 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15263 icode = CODE_FOR_vector_altivec_store_v4si;
15264 break;
15265 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15266 icode = CODE_FOR_vector_altivec_store_v4sf;
15267 break;
15268 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15269 icode = CODE_FOR_vector_altivec_store_v2df;
15270 break;
15271 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15272 icode = CODE_FOR_vector_altivec_store_v2di;
15273 break;
15274 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15275 icode = CODE_FOR_vector_altivec_store_v1ti;
15276 break;
15277 default:
15278 *expandedp = false;
15279 return NULL_RTX;
15282 arg0 = CALL_EXPR_ARG (exp, 0);
15283 arg1 = CALL_EXPR_ARG (exp, 1);
15284 op0 = expand_normal (arg0);
15285 op1 = expand_normal (arg1);
15286 mode0 = insn_data[icode].operand[0].mode;
15287 mode1 = insn_data[icode].operand[1].mode;
15289 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15290 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15291 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15292 op1 = copy_to_mode_reg (mode1, op1);
15294 pat = GEN_FCN (icode) (op0, op1);
15295 if (pat)
15296 emit_insn (pat);
15298 *expandedp = true;
15299 return NULL_RTX;
15302 /* Expand the dst builtins. */
15303 static rtx
15304 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15305 bool *expandedp)
15307 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15308 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15309 tree arg0, arg1, arg2;
15310 machine_mode mode0, mode1;
15311 rtx pat, op0, op1, op2;
15312 const struct builtin_description *d;
15313 size_t i;
15315 *expandedp = false;
15317 /* Handle DST variants. */
15318 d = bdesc_dst;
15319 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15320 if (d->code == fcode)
15322 arg0 = CALL_EXPR_ARG (exp, 0);
15323 arg1 = CALL_EXPR_ARG (exp, 1);
15324 arg2 = CALL_EXPR_ARG (exp, 2);
15325 op0 = expand_normal (arg0);
15326 op1 = expand_normal (arg1);
15327 op2 = expand_normal (arg2);
15328 mode0 = insn_data[d->icode].operand[0].mode;
15329 mode1 = insn_data[d->icode].operand[1].mode;
15331 /* Invalid arguments, bail out before generating bad rtl. */
15332 if (arg0 == error_mark_node
15333 || arg1 == error_mark_node
15334 || arg2 == error_mark_node)
15335 return const0_rtx;
15337 *expandedp = true;
15338 STRIP_NOPS (arg2);
15339 if (TREE_CODE (arg2) != INTEGER_CST
15340 || TREE_INT_CST_LOW (arg2) & ~0x3)
15342 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15343 return const0_rtx;
15346 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15347 op0 = copy_to_mode_reg (Pmode, op0);
15348 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15349 op1 = copy_to_mode_reg (mode1, op1);
15351 pat = GEN_FCN (d->icode) (op0, op1, op2);
15352 if (pat != 0)
15353 emit_insn (pat);
15355 return NULL_RTX;
15358 return NULL_RTX;
15361 /* Expand vec_init builtin. */
15362 static rtx
15363 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15365 machine_mode tmode = TYPE_MODE (type);
15366 machine_mode inner_mode = GET_MODE_INNER (tmode);
15367 int i, n_elt = GET_MODE_NUNITS (tmode);
15369 gcc_assert (VECTOR_MODE_P (tmode));
15370 gcc_assert (n_elt == call_expr_nargs (exp));
15372 if (!target || !register_operand (target, tmode))
15373 target = gen_reg_rtx (tmode);
15375 /* If we have a vector compromised of a single element, such as V1TImode, do
15376 the initialization directly. */
15377 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15379 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15380 emit_move_insn (target, gen_lowpart (tmode, x));
15382 else
15384 rtvec v = rtvec_alloc (n_elt);
15386 for (i = 0; i < n_elt; ++i)
15388 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15389 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15392 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15395 return target;
15398 /* Return the integer constant in ARG. Constrain it to be in the range
15399 of the subparts of VEC_TYPE; issue an error if not. */
15401 static int
15402 get_element_number (tree vec_type, tree arg)
15404 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15406 if (!tree_fits_uhwi_p (arg)
15407 || (elt = tree_to_uhwi (arg), elt > max))
15409 error ("selector must be an integer constant in the range 0..%wi", max);
15410 return 0;
15413 return elt;
15416 /* Expand vec_set builtin. */
15417 static rtx
15418 altivec_expand_vec_set_builtin (tree exp)
15420 machine_mode tmode, mode1;
15421 tree arg0, arg1, arg2;
15422 int elt;
15423 rtx op0, op1;
15425 arg0 = CALL_EXPR_ARG (exp, 0);
15426 arg1 = CALL_EXPR_ARG (exp, 1);
15427 arg2 = CALL_EXPR_ARG (exp, 2);
15429 tmode = TYPE_MODE (TREE_TYPE (arg0));
15430 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15431 gcc_assert (VECTOR_MODE_P (tmode));
15433 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15434 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15435 elt = get_element_number (TREE_TYPE (arg0), arg2);
15437 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15438 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15440 op0 = force_reg (tmode, op0);
15441 op1 = force_reg (mode1, op1);
15443 rs6000_expand_vector_set (op0, op1, elt);
15445 return op0;
15448 /* Expand vec_ext builtin. */
15449 static rtx
15450 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15452 machine_mode tmode, mode0;
15453 tree arg0, arg1;
15454 rtx op0;
15455 rtx op1;
15457 arg0 = CALL_EXPR_ARG (exp, 0);
15458 arg1 = CALL_EXPR_ARG (exp, 1);
15460 op0 = expand_normal (arg0);
15461 op1 = expand_normal (arg1);
15463 /* Call get_element_number to validate arg1 if it is a constant. */
15464 if (TREE_CODE (arg1) == INTEGER_CST)
15465 (void) get_element_number (TREE_TYPE (arg0), arg1);
15467 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15468 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15469 gcc_assert (VECTOR_MODE_P (mode0));
15471 op0 = force_reg (mode0, op0);
15473 if (optimize || !target || !register_operand (target, tmode))
15474 target = gen_reg_rtx (tmode);
15476 rs6000_expand_vector_extract (target, op0, op1);
15478 return target;
15481 /* Expand the builtin in EXP and store the result in TARGET. Store
15482 true in *EXPANDEDP if we found a builtin to expand. */
15483 static rtx
15484 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15486 const struct builtin_description *d;
15487 size_t i;
15488 enum insn_code icode;
15489 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15490 tree arg0;
15491 rtx op0, pat;
15492 machine_mode tmode, mode0;
15493 enum rs6000_builtins fcode
15494 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15496 if (rs6000_overloaded_builtin_p (fcode))
15498 *expandedp = true;
15499 error ("unresolved overload for Altivec builtin %qF", fndecl);
15501 /* Given it is invalid, just generate a normal call. */
15502 return expand_call (exp, target, false);
15505 target = altivec_expand_ld_builtin (exp, target, expandedp);
15506 if (*expandedp)
15507 return target;
15509 target = altivec_expand_st_builtin (exp, target, expandedp);
15510 if (*expandedp)
15511 return target;
15513 target = altivec_expand_dst_builtin (exp, target, expandedp);
15514 if (*expandedp)
15515 return target;
15517 *expandedp = true;
15519 switch (fcode)
15521 case ALTIVEC_BUILTIN_STVX_V2DF:
15522 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15523 case ALTIVEC_BUILTIN_STVX_V2DI:
15524 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15525 case ALTIVEC_BUILTIN_STVX_V4SF:
15526 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15527 case ALTIVEC_BUILTIN_STVX:
15528 case ALTIVEC_BUILTIN_STVX_V4SI:
15529 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15530 case ALTIVEC_BUILTIN_STVX_V8HI:
15531 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15532 case ALTIVEC_BUILTIN_STVX_V16QI:
15533 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15534 case ALTIVEC_BUILTIN_STVEBX:
15535 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15536 case ALTIVEC_BUILTIN_STVEHX:
15537 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15538 case ALTIVEC_BUILTIN_STVEWX:
15539 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15540 case ALTIVEC_BUILTIN_STVXL_V2DF:
15541 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15542 case ALTIVEC_BUILTIN_STVXL_V2DI:
15543 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15544 case ALTIVEC_BUILTIN_STVXL_V4SF:
15545 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15546 case ALTIVEC_BUILTIN_STVXL:
15547 case ALTIVEC_BUILTIN_STVXL_V4SI:
15548 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15549 case ALTIVEC_BUILTIN_STVXL_V8HI:
15550 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15551 case ALTIVEC_BUILTIN_STVXL_V16QI:
15552 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15554 case ALTIVEC_BUILTIN_STVLX:
15555 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15556 case ALTIVEC_BUILTIN_STVLXL:
15557 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15558 case ALTIVEC_BUILTIN_STVRX:
15559 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15560 case ALTIVEC_BUILTIN_STVRXL:
15561 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15563 case P9V_BUILTIN_STXVL:
15564 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15566 case VSX_BUILTIN_STXVD2X_V1TI:
15567 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15568 case VSX_BUILTIN_STXVD2X_V2DF:
15569 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15570 case VSX_BUILTIN_STXVD2X_V2DI:
15571 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15572 case VSX_BUILTIN_STXVW4X_V4SF:
15573 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15574 case VSX_BUILTIN_STXVW4X_V4SI:
15575 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15576 case VSX_BUILTIN_STXVW4X_V8HI:
15577 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15578 case VSX_BUILTIN_STXVW4X_V16QI:
15579 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15581 /* For the following on big endian, it's ok to use any appropriate
15582 unaligned-supporting store, so use a generic expander. For
15583 little-endian, the exact element-reversing instruction must
15584 be used. */
15585 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15587 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15588 : CODE_FOR_vsx_st_elemrev_v2df);
15589 return altivec_expand_stv_builtin (code, exp);
15591 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15593 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15594 : CODE_FOR_vsx_st_elemrev_v2di);
15595 return altivec_expand_stv_builtin (code, exp);
15597 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15599 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15600 : CODE_FOR_vsx_st_elemrev_v4sf);
15601 return altivec_expand_stv_builtin (code, exp);
15603 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15605 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15606 : CODE_FOR_vsx_st_elemrev_v4si);
15607 return altivec_expand_stv_builtin (code, exp);
15609 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15611 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15612 : CODE_FOR_vsx_st_elemrev_v8hi);
15613 return altivec_expand_stv_builtin (code, exp);
15615 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15617 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15618 : CODE_FOR_vsx_st_elemrev_v16qi);
15619 return altivec_expand_stv_builtin (code, exp);
15622 case ALTIVEC_BUILTIN_MFVSCR:
15623 icode = CODE_FOR_altivec_mfvscr;
15624 tmode = insn_data[icode].operand[0].mode;
15626 if (target == 0
15627 || GET_MODE (target) != tmode
15628 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15629 target = gen_reg_rtx (tmode);
15631 pat = GEN_FCN (icode) (target);
15632 if (! pat)
15633 return 0;
15634 emit_insn (pat);
15635 return target;
15637 case ALTIVEC_BUILTIN_MTVSCR:
15638 icode = CODE_FOR_altivec_mtvscr;
15639 arg0 = CALL_EXPR_ARG (exp, 0);
15640 op0 = expand_normal (arg0);
15641 mode0 = insn_data[icode].operand[0].mode;
15643 /* If we got invalid arguments bail out before generating bad rtl. */
15644 if (arg0 == error_mark_node)
15645 return const0_rtx;
15647 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15648 op0 = copy_to_mode_reg (mode0, op0);
15650 pat = GEN_FCN (icode) (op0);
15651 if (pat)
15652 emit_insn (pat);
15653 return NULL_RTX;
15655 case ALTIVEC_BUILTIN_DSSALL:
15656 emit_insn (gen_altivec_dssall ());
15657 return NULL_RTX;
15659 case ALTIVEC_BUILTIN_DSS:
15660 icode = CODE_FOR_altivec_dss;
15661 arg0 = CALL_EXPR_ARG (exp, 0);
15662 STRIP_NOPS (arg0);
15663 op0 = expand_normal (arg0);
15664 mode0 = insn_data[icode].operand[0].mode;
15666 /* If we got invalid arguments bail out before generating bad rtl. */
15667 if (arg0 == error_mark_node)
15668 return const0_rtx;
15670 if (TREE_CODE (arg0) != INTEGER_CST
15671 || TREE_INT_CST_LOW (arg0) & ~0x3)
15673 error ("argument to dss must be a 2-bit unsigned literal");
15674 return const0_rtx;
15677 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15678 op0 = copy_to_mode_reg (mode0, op0);
15680 emit_insn (gen_altivec_dss (op0));
15681 return NULL_RTX;
15683 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15684 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15685 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15686 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15687 case VSX_BUILTIN_VEC_INIT_V2DF:
15688 case VSX_BUILTIN_VEC_INIT_V2DI:
15689 case VSX_BUILTIN_VEC_INIT_V1TI:
15690 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15692 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15693 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15694 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15695 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15696 case VSX_BUILTIN_VEC_SET_V2DF:
15697 case VSX_BUILTIN_VEC_SET_V2DI:
15698 case VSX_BUILTIN_VEC_SET_V1TI:
15699 return altivec_expand_vec_set_builtin (exp);
15701 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15702 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15703 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15704 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15705 case VSX_BUILTIN_VEC_EXT_V2DF:
15706 case VSX_BUILTIN_VEC_EXT_V2DI:
15707 case VSX_BUILTIN_VEC_EXT_V1TI:
15708 return altivec_expand_vec_ext_builtin (exp, target);
15710 default:
15711 break;
15712 /* Fall through. */
15715 /* Expand abs* operations. */
15716 d = bdesc_abs;
15717 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15718 if (d->code == fcode)
15719 return altivec_expand_abs_builtin (d->icode, exp, target);
15721 /* Expand the AltiVec predicates. */
15722 d = bdesc_altivec_preds;
15723 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15724 if (d->code == fcode)
15725 return altivec_expand_predicate_builtin (d->icode, exp, target);
15727 /* LV* are funky. We initialized them differently. */
15728 switch (fcode)
15730 case ALTIVEC_BUILTIN_LVSL:
15731 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15732 exp, target, false);
15733 case ALTIVEC_BUILTIN_LVSR:
15734 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15735 exp, target, false);
15736 case ALTIVEC_BUILTIN_LVEBX:
15737 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15738 exp, target, false);
15739 case ALTIVEC_BUILTIN_LVEHX:
15740 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15741 exp, target, false);
15742 case ALTIVEC_BUILTIN_LVEWX:
15743 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15744 exp, target, false);
15745 case ALTIVEC_BUILTIN_LVXL_V2DF:
15746 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15747 exp, target, false);
15748 case ALTIVEC_BUILTIN_LVXL_V2DI:
15749 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15750 exp, target, false);
15751 case ALTIVEC_BUILTIN_LVXL_V4SF:
15752 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15753 exp, target, false);
15754 case ALTIVEC_BUILTIN_LVXL:
15755 case ALTIVEC_BUILTIN_LVXL_V4SI:
15756 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15757 exp, target, false);
15758 case ALTIVEC_BUILTIN_LVXL_V8HI:
15759 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15760 exp, target, false);
15761 case ALTIVEC_BUILTIN_LVXL_V16QI:
15762 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15763 exp, target, false);
15764 case ALTIVEC_BUILTIN_LVX_V2DF:
15765 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15766 exp, target, false);
15767 case ALTIVEC_BUILTIN_LVX_V2DI:
15768 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15769 exp, target, false);
15770 case ALTIVEC_BUILTIN_LVX_V4SF:
15771 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15772 exp, target, false);
15773 case ALTIVEC_BUILTIN_LVX:
15774 case ALTIVEC_BUILTIN_LVX_V4SI:
15775 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15776 exp, target, false);
15777 case ALTIVEC_BUILTIN_LVX_V8HI:
15778 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15779 exp, target, false);
15780 case ALTIVEC_BUILTIN_LVX_V16QI:
15781 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15782 exp, target, false);
15783 case ALTIVEC_BUILTIN_LVLX:
15784 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15785 exp, target, true);
15786 case ALTIVEC_BUILTIN_LVLXL:
15787 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15788 exp, target, true);
15789 case ALTIVEC_BUILTIN_LVRX:
15790 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15791 exp, target, true);
15792 case ALTIVEC_BUILTIN_LVRXL:
15793 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15794 exp, target, true);
15795 case VSX_BUILTIN_LXVD2X_V1TI:
15796 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15797 exp, target, false);
15798 case VSX_BUILTIN_LXVD2X_V2DF:
15799 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15800 exp, target, false);
15801 case VSX_BUILTIN_LXVD2X_V2DI:
15802 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15803 exp, target, false);
15804 case VSX_BUILTIN_LXVW4X_V4SF:
15805 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15806 exp, target, false);
15807 case VSX_BUILTIN_LXVW4X_V4SI:
15808 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15809 exp, target, false);
15810 case VSX_BUILTIN_LXVW4X_V8HI:
15811 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15812 exp, target, false);
15813 case VSX_BUILTIN_LXVW4X_V16QI:
15814 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15815 exp, target, false);
15816 /* For the following on big endian, it's ok to use any appropriate
15817 unaligned-supporting load, so use a generic expander. For
15818 little-endian, the exact element-reversing instruction must
15819 be used. */
15820 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15822 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15823 : CODE_FOR_vsx_ld_elemrev_v2df);
15824 return altivec_expand_lv_builtin (code, exp, target, false);
15826 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15828 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15829 : CODE_FOR_vsx_ld_elemrev_v2di);
15830 return altivec_expand_lv_builtin (code, exp, target, false);
15832 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15834 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15835 : CODE_FOR_vsx_ld_elemrev_v4sf);
15836 return altivec_expand_lv_builtin (code, exp, target, false);
15838 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15840 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15841 : CODE_FOR_vsx_ld_elemrev_v4si);
15842 return altivec_expand_lv_builtin (code, exp, target, false);
15844 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15846 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15847 : CODE_FOR_vsx_ld_elemrev_v8hi);
15848 return altivec_expand_lv_builtin (code, exp, target, false);
15850 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15852 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15853 : CODE_FOR_vsx_ld_elemrev_v16qi);
15854 return altivec_expand_lv_builtin (code, exp, target, false);
15856 break;
15857 default:
15858 break;
15859 /* Fall through. */
15862 *expandedp = false;
15863 return NULL_RTX;
15866 /* Expand the builtin in EXP and store the result in TARGET. Store
15867 true in *EXPANDEDP if we found a builtin to expand. */
15868 static rtx
15869 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15871 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15872 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15873 const struct builtin_description *d;
15874 size_t i;
15876 *expandedp = true;
15878 switch (fcode)
15880 case PAIRED_BUILTIN_STX:
15881 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15882 case PAIRED_BUILTIN_LX:
15883 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15884 default:
15885 break;
15886 /* Fall through. */
15889 /* Expand the paired predicates. */
15890 d = bdesc_paired_preds;
15891 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15892 if (d->code == fcode)
15893 return paired_expand_predicate_builtin (d->icode, exp, target);
15895 *expandedp = false;
15896 return NULL_RTX;
15899 /* Binops that need to be initialized manually, but can be expanded
15900 automagically by rs6000_expand_binop_builtin. */
15901 static const struct builtin_description bdesc_2arg_spe[] =
15903 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15904 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15905 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15906 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15907 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15908 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15909 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15910 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15911 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15912 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15913 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15914 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15915 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15916 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15917 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15918 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15919 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15920 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15921 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15922 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15923 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15924 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15927 /* Expand the builtin in EXP and store the result in TARGET. Store
15928 true in *EXPANDEDP if we found a builtin to expand.
15930 This expands the SPE builtins that are not simple unary and binary
15931 operations. */
15932 static rtx
15933 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15935 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15936 tree arg1, arg0;
15937 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15938 enum insn_code icode;
15939 machine_mode tmode, mode0;
15940 rtx pat, op0;
15941 const struct builtin_description *d;
15942 size_t i;
15944 *expandedp = true;
15946 /* Syntax check for a 5-bit unsigned immediate. */
15947 switch (fcode)
15949 case SPE_BUILTIN_EVSTDD:
15950 case SPE_BUILTIN_EVSTDH:
15951 case SPE_BUILTIN_EVSTDW:
15952 case SPE_BUILTIN_EVSTWHE:
15953 case SPE_BUILTIN_EVSTWHO:
15954 case SPE_BUILTIN_EVSTWWE:
15955 case SPE_BUILTIN_EVSTWWO:
15956 arg1 = CALL_EXPR_ARG (exp, 2);
15957 if (TREE_CODE (arg1) != INTEGER_CST
15958 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15960 error ("argument 2 must be a 5-bit unsigned literal");
15961 return const0_rtx;
15963 break;
15964 default:
15965 break;
15968 /* The evsplat*i instructions are not quite generic. */
15969 switch (fcode)
15971 case SPE_BUILTIN_EVSPLATFI:
15972 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15973 exp, target);
15974 case SPE_BUILTIN_EVSPLATI:
15975 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15976 exp, target);
15977 default:
15978 break;
15981 d = bdesc_2arg_spe;
15982 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15983 if (d->code == fcode)
15984 return rs6000_expand_binop_builtin (d->icode, exp, target);
15986 d = bdesc_spe_predicates;
15987 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15988 if (d->code == fcode)
15989 return spe_expand_predicate_builtin (d->icode, exp, target);
15991 d = bdesc_spe_evsel;
15992 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15993 if (d->code == fcode)
15994 return spe_expand_evsel_builtin (d->icode, exp, target);
15996 switch (fcode)
15998 case SPE_BUILTIN_EVSTDDX:
15999 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16000 case SPE_BUILTIN_EVSTDHX:
16001 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16002 case SPE_BUILTIN_EVSTDWX:
16003 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16004 case SPE_BUILTIN_EVSTWHEX:
16005 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16006 case SPE_BUILTIN_EVSTWHOX:
16007 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16008 case SPE_BUILTIN_EVSTWWEX:
16009 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16010 case SPE_BUILTIN_EVSTWWOX:
16011 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16012 case SPE_BUILTIN_EVSTDD:
16013 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16014 case SPE_BUILTIN_EVSTDH:
16015 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16016 case SPE_BUILTIN_EVSTDW:
16017 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16018 case SPE_BUILTIN_EVSTWHE:
16019 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16020 case SPE_BUILTIN_EVSTWHO:
16021 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16022 case SPE_BUILTIN_EVSTWWE:
16023 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16024 case SPE_BUILTIN_EVSTWWO:
16025 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16026 case SPE_BUILTIN_MFSPEFSCR:
16027 icode = CODE_FOR_spe_mfspefscr;
16028 tmode = insn_data[icode].operand[0].mode;
16030 if (target == 0
16031 || GET_MODE (target) != tmode
16032 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16033 target = gen_reg_rtx (tmode);
16035 pat = GEN_FCN (icode) (target);
16036 if (! pat)
16037 return 0;
16038 emit_insn (pat);
16039 return target;
16040 case SPE_BUILTIN_MTSPEFSCR:
16041 icode = CODE_FOR_spe_mtspefscr;
16042 arg0 = CALL_EXPR_ARG (exp, 0);
16043 op0 = expand_normal (arg0);
16044 mode0 = insn_data[icode].operand[0].mode;
16046 if (arg0 == error_mark_node)
16047 return const0_rtx;
16049 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16050 op0 = copy_to_mode_reg (mode0, op0);
16052 pat = GEN_FCN (icode) (op0);
16053 if (pat)
16054 emit_insn (pat);
16055 return NULL_RTX;
16056 default:
16057 break;
16060 *expandedp = false;
16061 return NULL_RTX;
16064 static rtx
16065 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16067 rtx pat, scratch, tmp;
16068 tree form = CALL_EXPR_ARG (exp, 0);
16069 tree arg0 = CALL_EXPR_ARG (exp, 1);
16070 tree arg1 = CALL_EXPR_ARG (exp, 2);
16071 rtx op0 = expand_normal (arg0);
16072 rtx op1 = expand_normal (arg1);
16073 machine_mode mode0 = insn_data[icode].operand[1].mode;
16074 machine_mode mode1 = insn_data[icode].operand[2].mode;
16075 int form_int;
16076 enum rtx_code code;
16078 if (TREE_CODE (form) != INTEGER_CST)
16080 error ("argument 1 of __builtin_paired_predicate must be a constant");
16081 return const0_rtx;
16083 else
16084 form_int = TREE_INT_CST_LOW (form);
16086 gcc_assert (mode0 == mode1);
16088 if (arg0 == error_mark_node || arg1 == error_mark_node)
16089 return const0_rtx;
16091 if (target == 0
16092 || GET_MODE (target) != SImode
16093 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16094 target = gen_reg_rtx (SImode);
16095 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16096 op0 = copy_to_mode_reg (mode0, op0);
16097 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16098 op1 = copy_to_mode_reg (mode1, op1);
16100 scratch = gen_reg_rtx (CCFPmode);
16102 pat = GEN_FCN (icode) (scratch, op0, op1);
16103 if (!pat)
16104 return const0_rtx;
16106 emit_insn (pat);
16108 switch (form_int)
16110 /* LT bit. */
16111 case 0:
16112 code = LT;
16113 break;
16114 /* GT bit. */
16115 case 1:
16116 code = GT;
16117 break;
16118 /* EQ bit. */
16119 case 2:
16120 code = EQ;
16121 break;
16122 /* UN bit. */
16123 case 3:
16124 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16125 return target;
16126 default:
16127 error ("argument 1 of __builtin_paired_predicate is out of range");
16128 return const0_rtx;
16131 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16132 emit_move_insn (target, tmp);
16133 return target;
16136 static rtx
16137 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16139 rtx pat, scratch, tmp;
16140 tree form = CALL_EXPR_ARG (exp, 0);
16141 tree arg0 = CALL_EXPR_ARG (exp, 1);
16142 tree arg1 = CALL_EXPR_ARG (exp, 2);
16143 rtx op0 = expand_normal (arg0);
16144 rtx op1 = expand_normal (arg1);
16145 machine_mode mode0 = insn_data[icode].operand[1].mode;
16146 machine_mode mode1 = insn_data[icode].operand[2].mode;
16147 int form_int;
16148 enum rtx_code code;
16150 if (TREE_CODE (form) != INTEGER_CST)
16152 error ("argument 1 of __builtin_spe_predicate must be a constant");
16153 return const0_rtx;
16155 else
16156 form_int = TREE_INT_CST_LOW (form);
16158 gcc_assert (mode0 == mode1);
16160 if (arg0 == error_mark_node || arg1 == error_mark_node)
16161 return const0_rtx;
16163 if (target == 0
16164 || GET_MODE (target) != SImode
16165 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16166 target = gen_reg_rtx (SImode);
16168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16169 op0 = copy_to_mode_reg (mode0, op0);
16170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16171 op1 = copy_to_mode_reg (mode1, op1);
16173 scratch = gen_reg_rtx (CCmode);
16175 pat = GEN_FCN (icode) (scratch, op0, op1);
16176 if (! pat)
16177 return const0_rtx;
16178 emit_insn (pat);
16180 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16181 _lower_. We use one compare, but look in different bits of the
16182 CR for each variant.
16184 There are 2 elements in each SPE simd type (upper/lower). The CR
16185 bits are set as follows:
16187 BIT0 | BIT 1 | BIT 2 | BIT 3
16188 U | L | (U | L) | (U & L)
16190 So, for an "all" relationship, BIT 3 would be set.
16191 For an "any" relationship, BIT 2 would be set. Etc.
16193 Following traditional nomenclature, these bits map to:
16195 BIT0 | BIT 1 | BIT 2 | BIT 3
16196 LT | GT | EQ | OV
16198 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16201 switch (form_int)
16203 /* All variant. OV bit. */
16204 case 0:
16205 /* We need to get to the OV bit, which is the ORDERED bit. We
16206 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16207 that's ugly and will make validate_condition_mode die.
16208 So let's just use another pattern. */
16209 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16210 return target;
16211 /* Any variant. EQ bit. */
16212 case 1:
16213 code = EQ;
16214 break;
16215 /* Upper variant. LT bit. */
16216 case 2:
16217 code = LT;
16218 break;
16219 /* Lower variant. GT bit. */
16220 case 3:
16221 code = GT;
16222 break;
16223 default:
16224 error ("argument 1 of __builtin_spe_predicate is out of range");
16225 return const0_rtx;
16228 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16229 emit_move_insn (target, tmp);
16231 return target;
16234 /* The evsel builtins look like this:
16236 e = __builtin_spe_evsel_OP (a, b, c, d);
16238 and work like this:
16240 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16241 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16244 static rtx
16245 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16247 rtx pat, scratch;
16248 tree arg0 = CALL_EXPR_ARG (exp, 0);
16249 tree arg1 = CALL_EXPR_ARG (exp, 1);
16250 tree arg2 = CALL_EXPR_ARG (exp, 2);
16251 tree arg3 = CALL_EXPR_ARG (exp, 3);
16252 rtx op0 = expand_normal (arg0);
16253 rtx op1 = expand_normal (arg1);
16254 rtx op2 = expand_normal (arg2);
16255 rtx op3 = expand_normal (arg3);
16256 machine_mode mode0 = insn_data[icode].operand[1].mode;
16257 machine_mode mode1 = insn_data[icode].operand[2].mode;
16259 gcc_assert (mode0 == mode1);
16261 if (arg0 == error_mark_node || arg1 == error_mark_node
16262 || arg2 == error_mark_node || arg3 == error_mark_node)
16263 return const0_rtx;
16265 if (target == 0
16266 || GET_MODE (target) != mode0
16267 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16268 target = gen_reg_rtx (mode0);
16270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16271 op0 = copy_to_mode_reg (mode0, op0);
16272 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16273 op1 = copy_to_mode_reg (mode0, op1);
16274 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16275 op2 = copy_to_mode_reg (mode0, op2);
16276 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16277 op3 = copy_to_mode_reg (mode0, op3);
16279 /* Generate the compare. */
16280 scratch = gen_reg_rtx (CCmode);
16281 pat = GEN_FCN (icode) (scratch, op0, op1);
16282 if (! pat)
16283 return const0_rtx;
16284 emit_insn (pat);
16286 if (mode0 == V2SImode)
16287 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16288 else
16289 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16291 return target;
16294 /* Raise an error message for a builtin function that is called without the
16295 appropriate target options being set. */
16297 static void
16298 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16300 size_t uns_fncode = (size_t)fncode;
16301 const char *name = rs6000_builtin_info[uns_fncode].name;
16302 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16304 gcc_assert (name != NULL);
16305 if ((fnmask & RS6000_BTM_CELL) != 0)
16306 error ("Builtin function %s is only valid for the cell processor", name);
16307 else if ((fnmask & RS6000_BTM_VSX) != 0)
16308 error ("Builtin function %s requires the -mvsx option", name);
16309 else if ((fnmask & RS6000_BTM_HTM) != 0)
16310 error ("Builtin function %s requires the -mhtm option", name);
16311 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16312 error ("Builtin function %s requires the -maltivec option", name);
16313 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16314 error ("Builtin function %s requires the -mpaired option", name);
16315 else if ((fnmask & RS6000_BTM_SPE) != 0)
16316 error ("Builtin function %s requires the -mspe option", name);
16317 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16318 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16319 error ("Builtin function %s requires the -mhard-dfp and"
16320 " -mpower8-vector options", name);
16321 else if ((fnmask & RS6000_BTM_DFP) != 0)
16322 error ("Builtin function %s requires the -mhard-dfp option", name);
16323 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16324 error ("Builtin function %s requires the -mpower8-vector option", name);
16325 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16326 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16327 error ("Builtin function %s requires the -mcpu=power9 and"
16328 " -m64 options", name);
16329 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16330 error ("Builtin function %s requires the -mcpu=power9 option", name);
16331 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16332 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16333 error ("Builtin function %s requires the -mcpu=power9 and"
16334 " -m64 options", name);
16335 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16336 error ("Builtin function %s requires the -mcpu=power9 option", name);
16337 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16338 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16339 error ("Builtin function %s requires the -mhard-float and"
16340 " -mlong-double-128 options", name);
16341 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16342 error ("Builtin function %s requires the -mhard-float option", name);
16343 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16344 error ("Builtin function %s requires the -mfloat128 option", name);
16345 else
16346 error ("Builtin function %s is not supported with the current options",
16347 name);
16350 /* Target hook for early folding of built-ins, shamelessly stolen
16351 from ia64.c. */
16353 static tree
16354 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16355 tree *args, bool ignore ATTRIBUTE_UNUSED)
16357 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16359 enum rs6000_builtins fn_code
16360 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16361 switch (fn_code)
16363 case RS6000_BUILTIN_NANQ:
16364 case RS6000_BUILTIN_NANSQ:
16366 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16367 const char *str = c_getstr (*args);
16368 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16369 REAL_VALUE_TYPE real;
16371 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16372 return build_real (type, real);
16373 return NULL_TREE;
16375 case RS6000_BUILTIN_INFQ:
16376 case RS6000_BUILTIN_HUGE_VALQ:
16378 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16379 REAL_VALUE_TYPE inf;
16380 real_inf (&inf);
16381 return build_real (type, inf);
16383 default:
16384 break;
16387 #ifdef SUBTARGET_FOLD_BUILTIN
16388 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16389 #else
16390 return NULL_TREE;
16391 #endif
16394 /* Expand an expression EXP that calls a built-in function,
16395 with result going to TARGET if that's convenient
16396 (and in mode MODE if that's convenient).
16397 SUBTARGET may be used as the target for computing one of EXP's operands.
16398 IGNORE is nonzero if the value is to be ignored. */
16400 static rtx
16401 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16402 machine_mode mode ATTRIBUTE_UNUSED,
16403 int ignore ATTRIBUTE_UNUSED)
16405 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16406 enum rs6000_builtins fcode
16407 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16408 size_t uns_fcode = (size_t)fcode;
16409 const struct builtin_description *d;
16410 size_t i;
16411 rtx ret;
16412 bool success;
16413 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16414 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16416 if (TARGET_DEBUG_BUILTIN)
16418 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16419 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16420 const char *name2 = ((icode != CODE_FOR_nothing)
16421 ? get_insn_name ((int)icode)
16422 : "nothing");
16423 const char *name3;
16425 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16427 default: name3 = "unknown"; break;
16428 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16429 case RS6000_BTC_UNARY: name3 = "unary"; break;
16430 case RS6000_BTC_BINARY: name3 = "binary"; break;
16431 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16432 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16433 case RS6000_BTC_ABS: name3 = "abs"; break;
16434 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16435 case RS6000_BTC_DST: name3 = "dst"; break;
16439 fprintf (stderr,
16440 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16441 (name1) ? name1 : "---", fcode,
16442 (name2) ? name2 : "---", (int)icode,
16443 name3,
16444 func_valid_p ? "" : ", not valid");
16447 if (!func_valid_p)
16449 rs6000_invalid_builtin (fcode);
16451 /* Given it is invalid, just generate a normal call. */
16452 return expand_call (exp, target, ignore);
16455 switch (fcode)
16457 case RS6000_BUILTIN_RECIP:
16458 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16460 case RS6000_BUILTIN_RECIPF:
16461 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16463 case RS6000_BUILTIN_RSQRTF:
16464 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16466 case RS6000_BUILTIN_RSQRT:
16467 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16469 case POWER7_BUILTIN_BPERMD:
16470 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16471 ? CODE_FOR_bpermd_di
16472 : CODE_FOR_bpermd_si), exp, target);
16474 case RS6000_BUILTIN_GET_TB:
16475 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16476 target);
16478 case RS6000_BUILTIN_MFTB:
16479 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16480 ? CODE_FOR_rs6000_mftb_di
16481 : CODE_FOR_rs6000_mftb_si),
16482 target);
16484 case RS6000_BUILTIN_MFFS:
16485 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16487 case RS6000_BUILTIN_MTFSF:
16488 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16490 case RS6000_BUILTIN_CPU_INIT:
16491 case RS6000_BUILTIN_CPU_IS:
16492 case RS6000_BUILTIN_CPU_SUPPORTS:
16493 return cpu_expand_builtin (fcode, exp, target);
16495 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16496 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16498 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16499 : (int) CODE_FOR_altivec_lvsl_direct);
16500 machine_mode tmode = insn_data[icode].operand[0].mode;
16501 machine_mode mode = insn_data[icode].operand[1].mode;
16502 tree arg;
16503 rtx op, addr, pat;
16505 gcc_assert (TARGET_ALTIVEC);
16507 arg = CALL_EXPR_ARG (exp, 0);
16508 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16509 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16510 addr = memory_address (mode, op);
16511 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16512 op = addr;
16513 else
16515 /* For the load case need to negate the address. */
16516 op = gen_reg_rtx (GET_MODE (addr));
16517 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16519 op = gen_rtx_MEM (mode, op);
16521 if (target == 0
16522 || GET_MODE (target) != tmode
16523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16524 target = gen_reg_rtx (tmode);
16526 pat = GEN_FCN (icode) (target, op);
16527 if (!pat)
16528 return 0;
16529 emit_insn (pat);
16531 return target;
16534 case ALTIVEC_BUILTIN_VCFUX:
16535 case ALTIVEC_BUILTIN_VCFSX:
16536 case ALTIVEC_BUILTIN_VCTUXS:
16537 case ALTIVEC_BUILTIN_VCTSXS:
16538 /* FIXME: There's got to be a nicer way to handle this case than
16539 constructing a new CALL_EXPR. */
16540 if (call_expr_nargs (exp) == 1)
16542 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16543 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16545 break;
16547 default:
16548 break;
16551 if (TARGET_ALTIVEC)
16553 ret = altivec_expand_builtin (exp, target, &success);
16555 if (success)
16556 return ret;
16558 if (TARGET_SPE)
16560 ret = spe_expand_builtin (exp, target, &success);
16562 if (success)
16563 return ret;
16565 if (TARGET_PAIRED_FLOAT)
16567 ret = paired_expand_builtin (exp, target, &success);
16569 if (success)
16570 return ret;
16572 if (TARGET_HTM)
16574 ret = htm_expand_builtin (exp, target, &success);
16576 if (success)
16577 return ret;
16580 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16581 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16582 gcc_assert (attr == RS6000_BTC_UNARY
16583 || attr == RS6000_BTC_BINARY
16584 || attr == RS6000_BTC_TERNARY
16585 || attr == RS6000_BTC_SPECIAL);
16587 /* Handle simple unary operations. */
16588 d = bdesc_1arg;
16589 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16590 if (d->code == fcode)
16591 return rs6000_expand_unop_builtin (d->icode, exp, target);
16593 /* Handle simple binary operations. */
16594 d = bdesc_2arg;
16595 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16596 if (d->code == fcode)
16597 return rs6000_expand_binop_builtin (d->icode, exp, target);
16599 /* Handle simple ternary operations. */
16600 d = bdesc_3arg;
16601 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16602 if (d->code == fcode)
16603 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16605 /* Handle simple no-argument operations. */
16606 d = bdesc_0arg;
16607 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16608 if (d->code == fcode)
16609 return rs6000_expand_zeroop_builtin (d->icode, target);
16611 gcc_unreachable ();
16614 static void
16615 rs6000_init_builtins (void)
16617 tree tdecl;
16618 tree ftype;
16619 machine_mode mode;
16621 if (TARGET_DEBUG_BUILTIN)
16622 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16623 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16624 (TARGET_SPE) ? ", spe" : "",
16625 (TARGET_ALTIVEC) ? ", altivec" : "",
16626 (TARGET_VSX) ? ", vsx" : "");
16628 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16629 V2SF_type_node = build_vector_type (float_type_node, 2);
16630 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16631 V2DF_type_node = build_vector_type (double_type_node, 2);
16632 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16633 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16634 V4SF_type_node = build_vector_type (float_type_node, 4);
16635 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16636 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16638 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16639 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16640 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16641 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16643 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16644 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16645 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16646 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16648 const_str_type_node
16649 = build_pointer_type (build_qualified_type (char_type_node,
16650 TYPE_QUAL_CONST));
16652 /* We use V1TI mode as a special container to hold __int128_t items that
16653 must live in VSX registers. */
16654 if (intTI_type_node)
16656 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16657 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16660 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16661 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16662 'vector unsigned short'. */
16664 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16665 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16666 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16667 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16668 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16670 long_integer_type_internal_node = long_integer_type_node;
16671 long_unsigned_type_internal_node = long_unsigned_type_node;
16672 long_long_integer_type_internal_node = long_long_integer_type_node;
16673 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16674 intQI_type_internal_node = intQI_type_node;
16675 uintQI_type_internal_node = unsigned_intQI_type_node;
16676 intHI_type_internal_node = intHI_type_node;
16677 uintHI_type_internal_node = unsigned_intHI_type_node;
16678 intSI_type_internal_node = intSI_type_node;
16679 uintSI_type_internal_node = unsigned_intSI_type_node;
16680 intDI_type_internal_node = intDI_type_node;
16681 uintDI_type_internal_node = unsigned_intDI_type_node;
16682 intTI_type_internal_node = intTI_type_node;
16683 uintTI_type_internal_node = unsigned_intTI_type_node;
16684 float_type_internal_node = float_type_node;
16685 double_type_internal_node = double_type_node;
16686 long_double_type_internal_node = long_double_type_node;
16687 dfloat64_type_internal_node = dfloat64_type_node;
16688 dfloat128_type_internal_node = dfloat128_type_node;
16689 void_type_internal_node = void_type_node;
16691 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16692 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16693 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16694 format that uses a pair of doubles, depending on the switches and
16695 defaults.
16697 We do not enable the actual __float128 keyword unless the user explicitly
16698 asks for it, because the library support is not yet complete.
16700 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16701 floating point, we need make sure the type is non-zero or else self-test
16702 fails during bootstrap.
16704 We don't register a built-in type for __ibm128 if the type is the same as
16705 long double. Instead we add a #define for __ibm128 in
16706 rs6000_cpu_cpp_builtins to long double. */
16707 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16709 ibm128_float_type_node = make_node (REAL_TYPE);
16710 TYPE_PRECISION (ibm128_float_type_node) = 128;
16711 layout_type (ibm128_float_type_node);
16712 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16714 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16715 "__ibm128");
16717 else
16718 ibm128_float_type_node = long_double_type_node;
16720 if (TARGET_FLOAT128_KEYWORD)
16722 ieee128_float_type_node = float128_type_node;
16723 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16724 "__float128");
16727 else if (TARGET_FLOAT128_TYPE)
16729 ieee128_float_type_node = make_node (REAL_TYPE);
16730 TYPE_PRECISION (ibm128_float_type_node) = 128;
16731 layout_type (ieee128_float_type_node);
16732 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16734 /* If we are not exporting the __float128/_Float128 keywords, we need a
16735 keyword to get the types created. Use __ieee128 as the dummy
16736 keyword. */
16737 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16738 "__ieee128");
16741 else
16742 ieee128_float_type_node = long_double_type_node;
16744 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16745 tree type node. */
16746 builtin_mode_to_type[QImode][0] = integer_type_node;
16747 builtin_mode_to_type[HImode][0] = integer_type_node;
16748 builtin_mode_to_type[SImode][0] = intSI_type_node;
16749 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16750 builtin_mode_to_type[DImode][0] = intDI_type_node;
16751 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16752 builtin_mode_to_type[TImode][0] = intTI_type_node;
16753 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16754 builtin_mode_to_type[SFmode][0] = float_type_node;
16755 builtin_mode_to_type[DFmode][0] = double_type_node;
16756 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16757 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16758 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16759 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16760 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16761 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16762 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16763 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16764 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16765 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16766 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16767 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16768 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16769 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16770 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16771 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16772 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16773 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16774 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16775 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16777 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16778 TYPE_NAME (bool_char_type_node) = tdecl;
16780 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16781 TYPE_NAME (bool_short_type_node) = tdecl;
16783 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16784 TYPE_NAME (bool_int_type_node) = tdecl;
16786 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16787 TYPE_NAME (pixel_type_node) = tdecl;
16789 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16790 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16791 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16792 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16793 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16795 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16796 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16798 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16799 TYPE_NAME (V16QI_type_node) = tdecl;
16801 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16802 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16804 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16805 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16807 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16808 TYPE_NAME (V8HI_type_node) = tdecl;
16810 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16811 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16813 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16814 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16816 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16817 TYPE_NAME (V4SI_type_node) = tdecl;
16819 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16820 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16822 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16823 TYPE_NAME (V4SF_type_node) = tdecl;
16825 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16826 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16828 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16829 TYPE_NAME (V2DF_type_node) = tdecl;
16831 if (TARGET_POWERPC64)
16833 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16834 TYPE_NAME (V2DI_type_node) = tdecl;
16836 tdecl = add_builtin_type ("__vector unsigned long",
16837 unsigned_V2DI_type_node);
16838 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16840 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16841 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16843 else
16845 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16846 TYPE_NAME (V2DI_type_node) = tdecl;
16848 tdecl = add_builtin_type ("__vector unsigned long long",
16849 unsigned_V2DI_type_node);
16850 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16852 tdecl = add_builtin_type ("__vector __bool long long",
16853 bool_V2DI_type_node);
16854 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16857 if (V1TI_type_node)
16859 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16860 TYPE_NAME (V1TI_type_node) = tdecl;
16862 tdecl = add_builtin_type ("__vector unsigned __int128",
16863 unsigned_V1TI_type_node);
16864 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16867 /* Paired and SPE builtins are only available if you build a compiler with
16868 the appropriate options, so only create those builtins with the
16869 appropriate compiler option. Create Altivec and VSX builtins on machines
16870 with at least the general purpose extensions (970 and newer) to allow the
16871 use of the target attribute. */
16872 if (TARGET_PAIRED_FLOAT)
16873 paired_init_builtins ();
16874 if (TARGET_SPE)
16875 spe_init_builtins ();
16876 if (TARGET_EXTRA_BUILTINS)
16877 altivec_init_builtins ();
16878 if (TARGET_HTM)
16879 htm_init_builtins ();
16881 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16882 rs6000_common_init_builtins ();
16884 ftype = build_function_type_list (ieee128_float_type_node,
16885 const_str_type_node, NULL_TREE);
16886 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16887 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16889 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16890 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16891 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16893 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16894 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16895 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16897 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16898 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16899 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16901 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16902 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16903 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16905 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16906 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16907 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16909 mode = (TARGET_64BIT) ? DImode : SImode;
16910 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16911 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16912 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16914 ftype = build_function_type_list (unsigned_intDI_type_node,
16915 NULL_TREE);
16916 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16918 if (TARGET_64BIT)
16919 ftype = build_function_type_list (unsigned_intDI_type_node,
16920 NULL_TREE);
16921 else
16922 ftype = build_function_type_list (unsigned_intSI_type_node,
16923 NULL_TREE);
16924 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16926 ftype = build_function_type_list (double_type_node, NULL_TREE);
16927 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16929 ftype = build_function_type_list (void_type_node,
16930 intSI_type_node, double_type_node,
16931 NULL_TREE);
16932 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16934 ftype = build_function_type_list (void_type_node, NULL_TREE);
16935 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16937 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16938 NULL_TREE);
16939 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16940 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16942 #if TARGET_XCOFF
16943 /* AIX libm provides clog as __clog. */
16944 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16945 set_user_assembler_name (tdecl, "__clog");
16946 #endif
16948 #ifdef SUBTARGET_INIT_BUILTINS
16949 SUBTARGET_INIT_BUILTINS;
16950 #endif
16953 /* Returns the rs6000 builtin decl for CODE. */
16955 static tree
16956 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16958 HOST_WIDE_INT fnmask;
16960 if (code >= RS6000_BUILTIN_COUNT)
16961 return error_mark_node;
16963 fnmask = rs6000_builtin_info[code].mask;
16964 if ((fnmask & rs6000_builtin_mask) != fnmask)
16966 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16967 return error_mark_node;
16970 return rs6000_builtin_decls[code];
16973 static void
16974 spe_init_builtins (void)
16976 tree puint_type_node = build_pointer_type (unsigned_type_node);
16977 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16978 const struct builtin_description *d;
16979 size_t i;
16980 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16982 tree v2si_ftype_4_v2si
16983 = build_function_type_list (opaque_V2SI_type_node,
16984 opaque_V2SI_type_node,
16985 opaque_V2SI_type_node,
16986 opaque_V2SI_type_node,
16987 opaque_V2SI_type_node,
16988 NULL_TREE);
16990 tree v2sf_ftype_4_v2sf
16991 = build_function_type_list (opaque_V2SF_type_node,
16992 opaque_V2SF_type_node,
16993 opaque_V2SF_type_node,
16994 opaque_V2SF_type_node,
16995 opaque_V2SF_type_node,
16996 NULL_TREE);
16998 tree int_ftype_int_v2si_v2si
16999 = build_function_type_list (integer_type_node,
17000 integer_type_node,
17001 opaque_V2SI_type_node,
17002 opaque_V2SI_type_node,
17003 NULL_TREE);
17005 tree int_ftype_int_v2sf_v2sf
17006 = build_function_type_list (integer_type_node,
17007 integer_type_node,
17008 opaque_V2SF_type_node,
17009 opaque_V2SF_type_node,
17010 NULL_TREE);
17012 tree void_ftype_v2si_puint_int
17013 = build_function_type_list (void_type_node,
17014 opaque_V2SI_type_node,
17015 puint_type_node,
17016 integer_type_node,
17017 NULL_TREE);
17019 tree void_ftype_v2si_puint_char
17020 = build_function_type_list (void_type_node,
17021 opaque_V2SI_type_node,
17022 puint_type_node,
17023 char_type_node,
17024 NULL_TREE);
17026 tree void_ftype_v2si_pv2si_int
17027 = build_function_type_list (void_type_node,
17028 opaque_V2SI_type_node,
17029 opaque_p_V2SI_type_node,
17030 integer_type_node,
17031 NULL_TREE);
17033 tree void_ftype_v2si_pv2si_char
17034 = build_function_type_list (void_type_node,
17035 opaque_V2SI_type_node,
17036 opaque_p_V2SI_type_node,
17037 char_type_node,
17038 NULL_TREE);
17040 tree void_ftype_int
17041 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17043 tree int_ftype_void
17044 = build_function_type_list (integer_type_node, NULL_TREE);
17046 tree v2si_ftype_pv2si_int
17047 = build_function_type_list (opaque_V2SI_type_node,
17048 opaque_p_V2SI_type_node,
17049 integer_type_node,
17050 NULL_TREE);
17052 tree v2si_ftype_puint_int
17053 = build_function_type_list (opaque_V2SI_type_node,
17054 puint_type_node,
17055 integer_type_node,
17056 NULL_TREE);
17058 tree v2si_ftype_pushort_int
17059 = build_function_type_list (opaque_V2SI_type_node,
17060 pushort_type_node,
17061 integer_type_node,
17062 NULL_TREE);
17064 tree v2si_ftype_signed_char
17065 = build_function_type_list (opaque_V2SI_type_node,
17066 signed_char_type_node,
17067 NULL_TREE);
17069 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17071 /* Initialize irregular SPE builtins. */
17073 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17074 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17075 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17076 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17077 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17078 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17079 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17080 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17081 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17082 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17083 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
17084 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
17085 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
17086 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
17087 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
17088 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
17089 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
17090 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
17092 /* Loads. */
17093 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
17094 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
17095 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
17096 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
17097 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
17098 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
17099 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
17100 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
17101 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
17102 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
17103 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
17104 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
17105 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
17106 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
17107 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
17108 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
17109 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
17110 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
17111 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
17112 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
17113 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
17114 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
17116 /* Predicates. */
17117 d = bdesc_spe_predicates;
17118 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
17120 tree type;
17121 HOST_WIDE_INT mask = d->mask;
17123 if ((mask & builtin_mask) != mask)
17125 if (TARGET_DEBUG_BUILTIN)
17126 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
17127 d->name);
17128 continue;
17131 switch (insn_data[d->icode].operand[1].mode)
17133 case V2SImode:
17134 type = int_ftype_int_v2si_v2si;
17135 break;
17136 case V2SFmode:
17137 type = int_ftype_int_v2sf_v2sf;
17138 break;
17139 default:
17140 gcc_unreachable ();
17143 def_builtin (d->name, type, d->code);
17146 /* Evsel predicates. */
17147 d = bdesc_spe_evsel;
17148 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
17150 tree type;
17151 HOST_WIDE_INT mask = d->mask;
17153 if ((mask & builtin_mask) != mask)
17155 if (TARGET_DEBUG_BUILTIN)
17156 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
17157 d->name);
17158 continue;
17161 switch (insn_data[d->icode].operand[1].mode)
17163 case V2SImode:
17164 type = v2si_ftype_4_v2si;
17165 break;
17166 case V2SFmode:
17167 type = v2sf_ftype_4_v2sf;
17168 break;
17169 default:
17170 gcc_unreachable ();
17173 def_builtin (d->name, type, d->code);
17177 static void
17178 paired_init_builtins (void)
17180 const struct builtin_description *d;
17181 size_t i;
17182 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17184 tree int_ftype_int_v2sf_v2sf
17185 = build_function_type_list (integer_type_node,
17186 integer_type_node,
17187 V2SF_type_node,
17188 V2SF_type_node,
17189 NULL_TREE);
17190 tree pcfloat_type_node =
17191 build_pointer_type (build_qualified_type
17192 (float_type_node, TYPE_QUAL_CONST));
17194 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17195 long_integer_type_node,
17196 pcfloat_type_node,
17197 NULL_TREE);
17198 tree void_ftype_v2sf_long_pcfloat =
17199 build_function_type_list (void_type_node,
17200 V2SF_type_node,
17201 long_integer_type_node,
17202 pcfloat_type_node,
17203 NULL_TREE);
17206 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17207 PAIRED_BUILTIN_LX);
17210 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17211 PAIRED_BUILTIN_STX);
17213 /* Predicates. */
17214 d = bdesc_paired_preds;
17215 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17217 tree type;
17218 HOST_WIDE_INT mask = d->mask;
17220 if ((mask & builtin_mask) != mask)
17222 if (TARGET_DEBUG_BUILTIN)
17223 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17224 d->name);
17225 continue;
17228 if (TARGET_DEBUG_BUILTIN)
17229 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17230 (int)i, get_insn_name (d->icode), (int)d->icode,
17231 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17233 switch (insn_data[d->icode].operand[1].mode)
17235 case V2SFmode:
17236 type = int_ftype_int_v2sf_v2sf;
17237 break;
17238 default:
17239 gcc_unreachable ();
17242 def_builtin (d->name, type, d->code);
17246 static void
17247 altivec_init_builtins (void)
17249 const struct builtin_description *d;
17250 size_t i;
17251 tree ftype;
17252 tree decl;
17253 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17255 tree pvoid_type_node = build_pointer_type (void_type_node);
17257 tree pcvoid_type_node
17258 = build_pointer_type (build_qualified_type (void_type_node,
17259 TYPE_QUAL_CONST));
17261 tree int_ftype_opaque
17262 = build_function_type_list (integer_type_node,
17263 opaque_V4SI_type_node, NULL_TREE);
17264 tree opaque_ftype_opaque
17265 = build_function_type_list (integer_type_node, NULL_TREE);
17266 tree opaque_ftype_opaque_int
17267 = build_function_type_list (opaque_V4SI_type_node,
17268 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17269 tree opaque_ftype_opaque_opaque_int
17270 = build_function_type_list (opaque_V4SI_type_node,
17271 opaque_V4SI_type_node, opaque_V4SI_type_node,
17272 integer_type_node, NULL_TREE);
17273 tree opaque_ftype_opaque_opaque_opaque
17274 = build_function_type_list (opaque_V4SI_type_node,
17275 opaque_V4SI_type_node, opaque_V4SI_type_node,
17276 opaque_V4SI_type_node, NULL_TREE);
17277 tree opaque_ftype_opaque_opaque
17278 = build_function_type_list (opaque_V4SI_type_node,
17279 opaque_V4SI_type_node, opaque_V4SI_type_node,
17280 NULL_TREE);
17281 tree int_ftype_int_opaque_opaque
17282 = build_function_type_list (integer_type_node,
17283 integer_type_node, opaque_V4SI_type_node,
17284 opaque_V4SI_type_node, NULL_TREE);
17285 tree int_ftype_int_v4si_v4si
17286 = build_function_type_list (integer_type_node,
17287 integer_type_node, V4SI_type_node,
17288 V4SI_type_node, NULL_TREE);
17289 tree int_ftype_int_v2di_v2di
17290 = build_function_type_list (integer_type_node,
17291 integer_type_node, V2DI_type_node,
17292 V2DI_type_node, NULL_TREE);
17293 tree void_ftype_v4si
17294 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17295 tree v8hi_ftype_void
17296 = build_function_type_list (V8HI_type_node, NULL_TREE);
17297 tree void_ftype_void
17298 = build_function_type_list (void_type_node, NULL_TREE);
17299 tree void_ftype_int
17300 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17302 tree opaque_ftype_long_pcvoid
17303 = build_function_type_list (opaque_V4SI_type_node,
17304 long_integer_type_node, pcvoid_type_node,
17305 NULL_TREE);
17306 tree v16qi_ftype_long_pcvoid
17307 = build_function_type_list (V16QI_type_node,
17308 long_integer_type_node, pcvoid_type_node,
17309 NULL_TREE);
17310 tree v8hi_ftype_long_pcvoid
17311 = build_function_type_list (V8HI_type_node,
17312 long_integer_type_node, pcvoid_type_node,
17313 NULL_TREE);
17314 tree v4si_ftype_long_pcvoid
17315 = build_function_type_list (V4SI_type_node,
17316 long_integer_type_node, pcvoid_type_node,
17317 NULL_TREE);
17318 tree v4sf_ftype_long_pcvoid
17319 = build_function_type_list (V4SF_type_node,
17320 long_integer_type_node, pcvoid_type_node,
17321 NULL_TREE);
17322 tree v2df_ftype_long_pcvoid
17323 = build_function_type_list (V2DF_type_node,
17324 long_integer_type_node, pcvoid_type_node,
17325 NULL_TREE);
17326 tree v2di_ftype_long_pcvoid
17327 = build_function_type_list (V2DI_type_node,
17328 long_integer_type_node, pcvoid_type_node,
17329 NULL_TREE);
17331 tree void_ftype_opaque_long_pvoid
17332 = build_function_type_list (void_type_node,
17333 opaque_V4SI_type_node, long_integer_type_node,
17334 pvoid_type_node, NULL_TREE);
17335 tree void_ftype_v4si_long_pvoid
17336 = build_function_type_list (void_type_node,
17337 V4SI_type_node, long_integer_type_node,
17338 pvoid_type_node, NULL_TREE);
17339 tree void_ftype_v16qi_long_pvoid
17340 = build_function_type_list (void_type_node,
17341 V16QI_type_node, long_integer_type_node,
17342 pvoid_type_node, NULL_TREE);
17344 tree void_ftype_v16qi_pvoid_long
17345 = build_function_type_list (void_type_node,
17346 V16QI_type_node, pvoid_type_node,
17347 long_integer_type_node, NULL_TREE);
17349 tree void_ftype_v8hi_long_pvoid
17350 = build_function_type_list (void_type_node,
17351 V8HI_type_node, long_integer_type_node,
17352 pvoid_type_node, NULL_TREE);
17353 tree void_ftype_v4sf_long_pvoid
17354 = build_function_type_list (void_type_node,
17355 V4SF_type_node, long_integer_type_node,
17356 pvoid_type_node, NULL_TREE);
17357 tree void_ftype_v2df_long_pvoid
17358 = build_function_type_list (void_type_node,
17359 V2DF_type_node, long_integer_type_node,
17360 pvoid_type_node, NULL_TREE);
17361 tree void_ftype_v2di_long_pvoid
17362 = build_function_type_list (void_type_node,
17363 V2DI_type_node, long_integer_type_node,
17364 pvoid_type_node, NULL_TREE);
17365 tree int_ftype_int_v8hi_v8hi
17366 = build_function_type_list (integer_type_node,
17367 integer_type_node, V8HI_type_node,
17368 V8HI_type_node, NULL_TREE);
17369 tree int_ftype_int_v16qi_v16qi
17370 = build_function_type_list (integer_type_node,
17371 integer_type_node, V16QI_type_node,
17372 V16QI_type_node, NULL_TREE);
17373 tree int_ftype_int_v4sf_v4sf
17374 = build_function_type_list (integer_type_node,
17375 integer_type_node, V4SF_type_node,
17376 V4SF_type_node, NULL_TREE);
17377 tree int_ftype_int_v2df_v2df
17378 = build_function_type_list (integer_type_node,
17379 integer_type_node, V2DF_type_node,
17380 V2DF_type_node, NULL_TREE);
17381 tree v2di_ftype_v2di
17382 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17383 tree v4si_ftype_v4si
17384 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17385 tree v8hi_ftype_v8hi
17386 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17387 tree v16qi_ftype_v16qi
17388 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17389 tree v4sf_ftype_v4sf
17390 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17391 tree v2df_ftype_v2df
17392 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17393 tree void_ftype_pcvoid_int_int
17394 = build_function_type_list (void_type_node,
17395 pcvoid_type_node, integer_type_node,
17396 integer_type_node, NULL_TREE);
17398 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17399 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17400 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17401 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17402 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17403 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17404 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17405 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17406 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17407 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17408 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17409 ALTIVEC_BUILTIN_LVXL_V2DF);
17410 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17411 ALTIVEC_BUILTIN_LVXL_V2DI);
17412 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17413 ALTIVEC_BUILTIN_LVXL_V4SF);
17414 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17415 ALTIVEC_BUILTIN_LVXL_V4SI);
17416 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17417 ALTIVEC_BUILTIN_LVXL_V8HI);
17418 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17419 ALTIVEC_BUILTIN_LVXL_V16QI);
17420 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17421 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17422 ALTIVEC_BUILTIN_LVX_V2DF);
17423 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17424 ALTIVEC_BUILTIN_LVX_V2DI);
17425 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17426 ALTIVEC_BUILTIN_LVX_V4SF);
17427 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17428 ALTIVEC_BUILTIN_LVX_V4SI);
17429 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17430 ALTIVEC_BUILTIN_LVX_V8HI);
17431 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17432 ALTIVEC_BUILTIN_LVX_V16QI);
17433 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17434 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17435 ALTIVEC_BUILTIN_STVX_V2DF);
17436 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17437 ALTIVEC_BUILTIN_STVX_V2DI);
17438 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17439 ALTIVEC_BUILTIN_STVX_V4SF);
17440 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17441 ALTIVEC_BUILTIN_STVX_V4SI);
17442 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17443 ALTIVEC_BUILTIN_STVX_V8HI);
17444 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17445 ALTIVEC_BUILTIN_STVX_V16QI);
17446 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17447 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17448 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17449 ALTIVEC_BUILTIN_STVXL_V2DF);
17450 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17451 ALTIVEC_BUILTIN_STVXL_V2DI);
17452 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17453 ALTIVEC_BUILTIN_STVXL_V4SF);
17454 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17455 ALTIVEC_BUILTIN_STVXL_V4SI);
17456 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17457 ALTIVEC_BUILTIN_STVXL_V8HI);
17458 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17459 ALTIVEC_BUILTIN_STVXL_V16QI);
17460 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17461 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17462 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17463 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17464 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17465 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17466 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17467 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17468 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17469 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17470 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17471 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17472 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17473 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17474 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17475 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17477 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17478 VSX_BUILTIN_LXVD2X_V2DF);
17479 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17480 VSX_BUILTIN_LXVD2X_V2DI);
17481 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17482 VSX_BUILTIN_LXVW4X_V4SF);
17483 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17484 VSX_BUILTIN_LXVW4X_V4SI);
17485 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17486 VSX_BUILTIN_LXVW4X_V8HI);
17487 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17488 VSX_BUILTIN_LXVW4X_V16QI);
17489 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17490 VSX_BUILTIN_STXVD2X_V2DF);
17491 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17492 VSX_BUILTIN_STXVD2X_V2DI);
17493 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17494 VSX_BUILTIN_STXVW4X_V4SF);
17495 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17496 VSX_BUILTIN_STXVW4X_V4SI);
17497 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17498 VSX_BUILTIN_STXVW4X_V8HI);
17499 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17500 VSX_BUILTIN_STXVW4X_V16QI);
17502 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17503 VSX_BUILTIN_LD_ELEMREV_V2DF);
17504 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17505 VSX_BUILTIN_LD_ELEMREV_V2DI);
17506 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17507 VSX_BUILTIN_LD_ELEMREV_V4SF);
17508 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17509 VSX_BUILTIN_LD_ELEMREV_V4SI);
17510 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17511 VSX_BUILTIN_ST_ELEMREV_V2DF);
17512 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17513 VSX_BUILTIN_ST_ELEMREV_V2DI);
17514 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17515 VSX_BUILTIN_ST_ELEMREV_V4SF);
17516 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17517 VSX_BUILTIN_ST_ELEMREV_V4SI);
17519 if (TARGET_P9_VECTOR)
17521 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17522 VSX_BUILTIN_LD_ELEMREV_V8HI);
17523 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17524 VSX_BUILTIN_LD_ELEMREV_V16QI);
17525 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17526 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17527 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17528 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17531 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17532 VSX_BUILTIN_VEC_LD);
17533 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17534 VSX_BUILTIN_VEC_ST);
17535 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17536 VSX_BUILTIN_VEC_XL);
17537 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17538 VSX_BUILTIN_VEC_XST);
17540 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17541 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17542 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17544 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17545 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17546 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17547 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17548 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17549 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17550 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17551 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17552 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17553 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17554 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17555 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17557 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17558 ALTIVEC_BUILTIN_VEC_ADDE);
17559 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17560 ALTIVEC_BUILTIN_VEC_ADDEC);
17561 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17562 ALTIVEC_BUILTIN_VEC_CMPNE);
17563 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17564 ALTIVEC_BUILTIN_VEC_MUL);
17566 /* Cell builtins. */
17567 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17568 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17569 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17570 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17572 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17573 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17574 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17575 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17577 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17578 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17579 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17580 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17582 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17583 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17584 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17585 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17587 if (TARGET_P9_VECTOR)
17588 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17589 P9V_BUILTIN_STXVL);
17591 /* Add the DST variants. */
17592 d = bdesc_dst;
17593 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17595 HOST_WIDE_INT mask = d->mask;
17597 if ((mask & builtin_mask) != mask)
17599 if (TARGET_DEBUG_BUILTIN)
17600 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17601 d->name);
17602 continue;
17604 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17607 /* Initialize the predicates. */
17608 d = bdesc_altivec_preds;
17609 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17611 machine_mode mode1;
17612 tree type;
17613 HOST_WIDE_INT mask = d->mask;
17615 if ((mask & builtin_mask) != mask)
17617 if (TARGET_DEBUG_BUILTIN)
17618 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17619 d->name);
17620 continue;
17623 if (rs6000_overloaded_builtin_p (d->code))
17624 mode1 = VOIDmode;
17625 else
17626 mode1 = insn_data[d->icode].operand[1].mode;
17628 switch (mode1)
17630 case VOIDmode:
17631 type = int_ftype_int_opaque_opaque;
17632 break;
17633 case V2DImode:
17634 type = int_ftype_int_v2di_v2di;
17635 break;
17636 case V4SImode:
17637 type = int_ftype_int_v4si_v4si;
17638 break;
17639 case V8HImode:
17640 type = int_ftype_int_v8hi_v8hi;
17641 break;
17642 case V16QImode:
17643 type = int_ftype_int_v16qi_v16qi;
17644 break;
17645 case V4SFmode:
17646 type = int_ftype_int_v4sf_v4sf;
17647 break;
17648 case V2DFmode:
17649 type = int_ftype_int_v2df_v2df;
17650 break;
17651 default:
17652 gcc_unreachable ();
17655 def_builtin (d->name, type, d->code);
17658 /* Initialize the abs* operators. */
17659 d = bdesc_abs;
17660 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17662 machine_mode mode0;
17663 tree type;
17664 HOST_WIDE_INT mask = d->mask;
17666 if ((mask & builtin_mask) != mask)
17668 if (TARGET_DEBUG_BUILTIN)
17669 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17670 d->name);
17671 continue;
17674 mode0 = insn_data[d->icode].operand[0].mode;
17676 switch (mode0)
17678 case V2DImode:
17679 type = v2di_ftype_v2di;
17680 break;
17681 case V4SImode:
17682 type = v4si_ftype_v4si;
17683 break;
17684 case V8HImode:
17685 type = v8hi_ftype_v8hi;
17686 break;
17687 case V16QImode:
17688 type = v16qi_ftype_v16qi;
17689 break;
17690 case V4SFmode:
17691 type = v4sf_ftype_v4sf;
17692 break;
17693 case V2DFmode:
17694 type = v2df_ftype_v2df;
17695 break;
17696 default:
17697 gcc_unreachable ();
17700 def_builtin (d->name, type, d->code);
17703 /* Initialize target builtin that implements
17704 targetm.vectorize.builtin_mask_for_load. */
17706 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17707 v16qi_ftype_long_pcvoid,
17708 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17709 BUILT_IN_MD, NULL, NULL_TREE);
17710 TREE_READONLY (decl) = 1;
17711 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17712 altivec_builtin_mask_for_load = decl;
17714 /* Access to the vec_init patterns. */
17715 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17716 integer_type_node, integer_type_node,
17717 integer_type_node, NULL_TREE);
17718 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17720 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17721 short_integer_type_node,
17722 short_integer_type_node,
17723 short_integer_type_node,
17724 short_integer_type_node,
17725 short_integer_type_node,
17726 short_integer_type_node,
17727 short_integer_type_node, NULL_TREE);
17728 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17730 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17731 char_type_node, char_type_node,
17732 char_type_node, char_type_node,
17733 char_type_node, char_type_node,
17734 char_type_node, char_type_node,
17735 char_type_node, char_type_node,
17736 char_type_node, char_type_node,
17737 char_type_node, char_type_node,
17738 char_type_node, NULL_TREE);
17739 def_builtin ("__builtin_vec_init_v16qi", ftype,
17740 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17742 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17743 float_type_node, float_type_node,
17744 float_type_node, NULL_TREE);
17745 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17747 /* VSX builtins. */
17748 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17749 double_type_node, NULL_TREE);
17750 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17752 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17753 intDI_type_node, NULL_TREE);
17754 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17756 /* Access to the vec_set patterns. */
17757 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17758 intSI_type_node,
17759 integer_type_node, NULL_TREE);
17760 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17762 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17763 intHI_type_node,
17764 integer_type_node, NULL_TREE);
17765 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17767 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17768 intQI_type_node,
17769 integer_type_node, NULL_TREE);
17770 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17772 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17773 float_type_node,
17774 integer_type_node, NULL_TREE);
17775 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17777 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17778 double_type_node,
17779 integer_type_node, NULL_TREE);
17780 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17782 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17783 intDI_type_node,
17784 integer_type_node, NULL_TREE);
17785 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17787 /* Access to the vec_extract patterns. */
17788 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17789 integer_type_node, NULL_TREE);
17790 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17792 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17793 integer_type_node, NULL_TREE);
17794 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17796 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17797 integer_type_node, NULL_TREE);
17798 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17800 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17801 integer_type_node, NULL_TREE);
17802 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17804 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17805 integer_type_node, NULL_TREE);
17806 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17808 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17809 integer_type_node, NULL_TREE);
17810 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17813 if (V1TI_type_node)
17815 tree v1ti_ftype_long_pcvoid
17816 = build_function_type_list (V1TI_type_node,
17817 long_integer_type_node, pcvoid_type_node,
17818 NULL_TREE);
17819 tree void_ftype_v1ti_long_pvoid
17820 = build_function_type_list (void_type_node,
17821 V1TI_type_node, long_integer_type_node,
17822 pvoid_type_node, NULL_TREE);
17823 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17824 VSX_BUILTIN_LXVD2X_V1TI);
17825 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17826 VSX_BUILTIN_STXVD2X_V1TI);
17827 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17828 NULL_TREE, NULL_TREE);
17829 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17830 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17831 intTI_type_node,
17832 integer_type_node, NULL_TREE);
17833 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17834 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17835 integer_type_node, NULL_TREE);
17836 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17841 static void
17842 htm_init_builtins (void)
17844 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17845 const struct builtin_description *d;
17846 size_t i;
17848 d = bdesc_htm;
17849 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17851 tree op[MAX_HTM_OPERANDS], type;
17852 HOST_WIDE_INT mask = d->mask;
17853 unsigned attr = rs6000_builtin_info[d->code].attr;
17854 bool void_func = (attr & RS6000_BTC_VOID);
17855 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17856 int nopnds = 0;
17857 tree gpr_type_node;
17858 tree rettype;
17859 tree argtype;
17861 if (TARGET_32BIT && TARGET_POWERPC64)
17862 gpr_type_node = long_long_unsigned_type_node;
17863 else
17864 gpr_type_node = long_unsigned_type_node;
17866 if (attr & RS6000_BTC_SPR)
17868 rettype = gpr_type_node;
17869 argtype = gpr_type_node;
17871 else if (d->code == HTM_BUILTIN_TABORTDC
17872 || d->code == HTM_BUILTIN_TABORTDCI)
17874 rettype = unsigned_type_node;
17875 argtype = gpr_type_node;
17877 else
17879 rettype = unsigned_type_node;
17880 argtype = unsigned_type_node;
17883 if ((mask & builtin_mask) != mask)
17885 if (TARGET_DEBUG_BUILTIN)
17886 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17887 continue;
17890 if (d->name == 0)
17892 if (TARGET_DEBUG_BUILTIN)
17893 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17894 (long unsigned) i);
17895 continue;
17898 op[nopnds++] = (void_func) ? void_type_node : rettype;
17900 if (attr_args == RS6000_BTC_UNARY)
17901 op[nopnds++] = argtype;
17902 else if (attr_args == RS6000_BTC_BINARY)
17904 op[nopnds++] = argtype;
17905 op[nopnds++] = argtype;
17907 else if (attr_args == RS6000_BTC_TERNARY)
17909 op[nopnds++] = argtype;
17910 op[nopnds++] = argtype;
17911 op[nopnds++] = argtype;
17914 switch (nopnds)
17916 case 1:
17917 type = build_function_type_list (op[0], NULL_TREE);
17918 break;
17919 case 2:
17920 type = build_function_type_list (op[0], op[1], NULL_TREE);
17921 break;
17922 case 3:
17923 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17924 break;
17925 case 4:
17926 type = build_function_type_list (op[0], op[1], op[2], op[3],
17927 NULL_TREE);
17928 break;
17929 default:
17930 gcc_unreachable ();
17933 def_builtin (d->name, type, d->code);
17937 /* Hash function for builtin functions with up to 3 arguments and a return
17938 type. */
17939 hashval_t
17940 builtin_hasher::hash (builtin_hash_struct *bh)
17942 unsigned ret = 0;
17943 int i;
17945 for (i = 0; i < 4; i++)
17947 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17948 ret = (ret * 2) + bh->uns_p[i];
17951 return ret;
17954 /* Compare builtin hash entries H1 and H2 for equivalence. */
17955 bool
17956 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17958 return ((p1->mode[0] == p2->mode[0])
17959 && (p1->mode[1] == p2->mode[1])
17960 && (p1->mode[2] == p2->mode[2])
17961 && (p1->mode[3] == p2->mode[3])
17962 && (p1->uns_p[0] == p2->uns_p[0])
17963 && (p1->uns_p[1] == p2->uns_p[1])
17964 && (p1->uns_p[2] == p2->uns_p[2])
17965 && (p1->uns_p[3] == p2->uns_p[3]));
17968 /* Map types for builtin functions with an explicit return type and up to 3
17969 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17970 of the argument. */
17971 static tree
17972 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17973 machine_mode mode_arg1, machine_mode mode_arg2,
17974 enum rs6000_builtins builtin, const char *name)
17976 struct builtin_hash_struct h;
17977 struct builtin_hash_struct *h2;
17978 int num_args = 3;
17979 int i;
17980 tree ret_type = NULL_TREE;
17981 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17983 /* Create builtin_hash_table. */
17984 if (builtin_hash_table == NULL)
17985 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17987 h.type = NULL_TREE;
17988 h.mode[0] = mode_ret;
17989 h.mode[1] = mode_arg0;
17990 h.mode[2] = mode_arg1;
17991 h.mode[3] = mode_arg2;
17992 h.uns_p[0] = 0;
17993 h.uns_p[1] = 0;
17994 h.uns_p[2] = 0;
17995 h.uns_p[3] = 0;
17997 /* If the builtin is a type that produces unsigned results or takes unsigned
17998 arguments, and it is returned as a decl for the vectorizer (such as
17999 widening multiplies, permute), make sure the arguments and return value
18000 are type correct. */
18001 switch (builtin)
18003 /* unsigned 1 argument functions. */
18004 case CRYPTO_BUILTIN_VSBOX:
18005 case P8V_BUILTIN_VGBBD:
18006 case MISC_BUILTIN_CDTBCD:
18007 case MISC_BUILTIN_CBCDTD:
18008 h.uns_p[0] = 1;
18009 h.uns_p[1] = 1;
18010 break;
18012 /* unsigned 2 argument functions. */
18013 case ALTIVEC_BUILTIN_VMULEUB_UNS:
18014 case ALTIVEC_BUILTIN_VMULEUH_UNS:
18015 case ALTIVEC_BUILTIN_VMULOUB_UNS:
18016 case ALTIVEC_BUILTIN_VMULOUH_UNS:
18017 case CRYPTO_BUILTIN_VCIPHER:
18018 case CRYPTO_BUILTIN_VCIPHERLAST:
18019 case CRYPTO_BUILTIN_VNCIPHER:
18020 case CRYPTO_BUILTIN_VNCIPHERLAST:
18021 case CRYPTO_BUILTIN_VPMSUMB:
18022 case CRYPTO_BUILTIN_VPMSUMH:
18023 case CRYPTO_BUILTIN_VPMSUMW:
18024 case CRYPTO_BUILTIN_VPMSUMD:
18025 case CRYPTO_BUILTIN_VPMSUM:
18026 case MISC_BUILTIN_ADDG6S:
18027 case MISC_BUILTIN_DIVWEU:
18028 case MISC_BUILTIN_DIVWEUO:
18029 case MISC_BUILTIN_DIVDEU:
18030 case MISC_BUILTIN_DIVDEUO:
18031 h.uns_p[0] = 1;
18032 h.uns_p[1] = 1;
18033 h.uns_p[2] = 1;
18034 break;
18036 /* unsigned 3 argument functions. */
18037 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18038 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18039 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18040 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18041 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18042 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18043 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18044 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18045 case VSX_BUILTIN_VPERM_16QI_UNS:
18046 case VSX_BUILTIN_VPERM_8HI_UNS:
18047 case VSX_BUILTIN_VPERM_4SI_UNS:
18048 case VSX_BUILTIN_VPERM_2DI_UNS:
18049 case VSX_BUILTIN_XXSEL_16QI_UNS:
18050 case VSX_BUILTIN_XXSEL_8HI_UNS:
18051 case VSX_BUILTIN_XXSEL_4SI_UNS:
18052 case VSX_BUILTIN_XXSEL_2DI_UNS:
18053 case CRYPTO_BUILTIN_VPERMXOR:
18054 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18055 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18056 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18057 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18058 case CRYPTO_BUILTIN_VSHASIGMAW:
18059 case CRYPTO_BUILTIN_VSHASIGMAD:
18060 case CRYPTO_BUILTIN_VSHASIGMA:
18061 h.uns_p[0] = 1;
18062 h.uns_p[1] = 1;
18063 h.uns_p[2] = 1;
18064 h.uns_p[3] = 1;
18065 break;
18067 /* signed permute functions with unsigned char mask. */
18068 case ALTIVEC_BUILTIN_VPERM_16QI:
18069 case ALTIVEC_BUILTIN_VPERM_8HI:
18070 case ALTIVEC_BUILTIN_VPERM_4SI:
18071 case ALTIVEC_BUILTIN_VPERM_4SF:
18072 case ALTIVEC_BUILTIN_VPERM_2DI:
18073 case ALTIVEC_BUILTIN_VPERM_2DF:
18074 case VSX_BUILTIN_VPERM_16QI:
18075 case VSX_BUILTIN_VPERM_8HI:
18076 case VSX_BUILTIN_VPERM_4SI:
18077 case VSX_BUILTIN_VPERM_4SF:
18078 case VSX_BUILTIN_VPERM_2DI:
18079 case VSX_BUILTIN_VPERM_2DF:
18080 h.uns_p[3] = 1;
18081 break;
18083 /* unsigned args, signed return. */
18084 case VSX_BUILTIN_XVCVUXDDP_UNS:
18085 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18086 h.uns_p[1] = 1;
18087 break;
18089 /* signed args, unsigned return. */
18090 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18091 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18092 case MISC_BUILTIN_UNPACK_TD:
18093 case MISC_BUILTIN_UNPACK_V1TI:
18094 h.uns_p[0] = 1;
18095 break;
18097 /* unsigned arguments for 128-bit pack instructions. */
18098 case MISC_BUILTIN_PACK_TD:
18099 case MISC_BUILTIN_PACK_V1TI:
18100 h.uns_p[1] = 1;
18101 h.uns_p[2] = 1;
18102 break;
18104 default:
18105 break;
18108 /* Figure out how many args are present. */
18109 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18110 num_args--;
18112 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18113 if (!ret_type && h.uns_p[0])
18114 ret_type = builtin_mode_to_type[h.mode[0]][0];
18116 if (!ret_type)
18117 fatal_error (input_location,
18118 "internal error: builtin function %s had an unexpected "
18119 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18121 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18122 arg_type[i] = NULL_TREE;
18124 for (i = 0; i < num_args; i++)
18126 int m = (int) h.mode[i+1];
18127 int uns_p = h.uns_p[i+1];
18129 arg_type[i] = builtin_mode_to_type[m][uns_p];
18130 if (!arg_type[i] && uns_p)
18131 arg_type[i] = builtin_mode_to_type[m][0];
18133 if (!arg_type[i])
18134 fatal_error (input_location,
18135 "internal error: builtin function %s, argument %d "
18136 "had unexpected argument type %s", name, i,
18137 GET_MODE_NAME (m));
18140 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18141 if (*found == NULL)
18143 h2 = ggc_alloc<builtin_hash_struct> ();
18144 *h2 = h;
18145 *found = h2;
18147 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18148 arg_type[2], NULL_TREE);
18151 return (*found)->type;
18154 static void
18155 rs6000_common_init_builtins (void)
18157 const struct builtin_description *d;
18158 size_t i;
18160 tree opaque_ftype_opaque = NULL_TREE;
18161 tree opaque_ftype_opaque_opaque = NULL_TREE;
18162 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18163 tree v2si_ftype = NULL_TREE;
18164 tree v2si_ftype_qi = NULL_TREE;
18165 tree v2si_ftype_v2si_qi = NULL_TREE;
18166 tree v2si_ftype_int_qi = NULL_TREE;
18167 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18169 if (!TARGET_PAIRED_FLOAT)
18171 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18172 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18175 /* Paired and SPE builtins are only available if you build a compiler with
18176 the appropriate options, so only create those builtins with the
18177 appropriate compiler option. Create Altivec and VSX builtins on machines
18178 with at least the general purpose extensions (970 and newer) to allow the
18179 use of the target attribute.. */
18181 if (TARGET_EXTRA_BUILTINS)
18182 builtin_mask |= RS6000_BTM_COMMON;
18184 /* Add the ternary operators. */
18185 d = bdesc_3arg;
18186 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18188 tree type;
18189 HOST_WIDE_INT mask = d->mask;
18191 if ((mask & builtin_mask) != mask)
18193 if (TARGET_DEBUG_BUILTIN)
18194 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18195 continue;
18198 if (rs6000_overloaded_builtin_p (d->code))
18200 if (! (type = opaque_ftype_opaque_opaque_opaque))
18201 type = opaque_ftype_opaque_opaque_opaque
18202 = build_function_type_list (opaque_V4SI_type_node,
18203 opaque_V4SI_type_node,
18204 opaque_V4SI_type_node,
18205 opaque_V4SI_type_node,
18206 NULL_TREE);
18208 else
18210 enum insn_code icode = d->icode;
18211 if (d->name == 0)
18213 if (TARGET_DEBUG_BUILTIN)
18214 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18215 (long unsigned)i);
18217 continue;
18220 if (icode == CODE_FOR_nothing)
18222 if (TARGET_DEBUG_BUILTIN)
18223 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18224 d->name);
18226 continue;
18229 type = builtin_function_type (insn_data[icode].operand[0].mode,
18230 insn_data[icode].operand[1].mode,
18231 insn_data[icode].operand[2].mode,
18232 insn_data[icode].operand[3].mode,
18233 d->code, d->name);
18236 def_builtin (d->name, type, d->code);
18239 /* Add the binary operators. */
18240 d = bdesc_2arg;
18241 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18243 machine_mode mode0, mode1, mode2;
18244 tree type;
18245 HOST_WIDE_INT mask = d->mask;
18247 if ((mask & builtin_mask) != mask)
18249 if (TARGET_DEBUG_BUILTIN)
18250 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18251 continue;
18254 if (rs6000_overloaded_builtin_p (d->code))
18256 if (! (type = opaque_ftype_opaque_opaque))
18257 type = opaque_ftype_opaque_opaque
18258 = build_function_type_list (opaque_V4SI_type_node,
18259 opaque_V4SI_type_node,
18260 opaque_V4SI_type_node,
18261 NULL_TREE);
18263 else
18265 enum insn_code icode = d->icode;
18266 if (d->name == 0)
18268 if (TARGET_DEBUG_BUILTIN)
18269 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18270 (long unsigned)i);
18272 continue;
18275 if (icode == CODE_FOR_nothing)
18277 if (TARGET_DEBUG_BUILTIN)
18278 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18279 d->name);
18281 continue;
18284 mode0 = insn_data[icode].operand[0].mode;
18285 mode1 = insn_data[icode].operand[1].mode;
18286 mode2 = insn_data[icode].operand[2].mode;
18288 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18290 if (! (type = v2si_ftype_v2si_qi))
18291 type = v2si_ftype_v2si_qi
18292 = build_function_type_list (opaque_V2SI_type_node,
18293 opaque_V2SI_type_node,
18294 char_type_node,
18295 NULL_TREE);
18298 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18299 && mode2 == QImode)
18301 if (! (type = v2si_ftype_int_qi))
18302 type = v2si_ftype_int_qi
18303 = build_function_type_list (opaque_V2SI_type_node,
18304 integer_type_node,
18305 char_type_node,
18306 NULL_TREE);
18309 else
18310 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18311 d->code, d->name);
18314 def_builtin (d->name, type, d->code);
18317 /* Add the simple unary operators. */
18318 d = bdesc_1arg;
18319 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18321 machine_mode mode0, mode1;
18322 tree type;
18323 HOST_WIDE_INT mask = d->mask;
18325 if ((mask & builtin_mask) != mask)
18327 if (TARGET_DEBUG_BUILTIN)
18328 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18329 continue;
18332 if (rs6000_overloaded_builtin_p (d->code))
18334 if (! (type = opaque_ftype_opaque))
18335 type = opaque_ftype_opaque
18336 = build_function_type_list (opaque_V4SI_type_node,
18337 opaque_V4SI_type_node,
18338 NULL_TREE);
18340 else
18342 enum insn_code icode = d->icode;
18343 if (d->name == 0)
18345 if (TARGET_DEBUG_BUILTIN)
18346 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18347 (long unsigned)i);
18349 continue;
18352 if (icode == CODE_FOR_nothing)
18354 if (TARGET_DEBUG_BUILTIN)
18355 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18356 d->name);
18358 continue;
18361 mode0 = insn_data[icode].operand[0].mode;
18362 mode1 = insn_data[icode].operand[1].mode;
18364 if (mode0 == V2SImode && mode1 == QImode)
18366 if (! (type = v2si_ftype_qi))
18367 type = v2si_ftype_qi
18368 = build_function_type_list (opaque_V2SI_type_node,
18369 char_type_node,
18370 NULL_TREE);
18373 else
18374 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18375 d->code, d->name);
18378 def_builtin (d->name, type, d->code);
18381 /* Add the simple no-argument operators. */
18382 d = bdesc_0arg;
18383 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18385 machine_mode mode0;
18386 tree type;
18387 HOST_WIDE_INT mask = d->mask;
18389 if ((mask & builtin_mask) != mask)
18391 if (TARGET_DEBUG_BUILTIN)
18392 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18393 continue;
18395 if (rs6000_overloaded_builtin_p (d->code))
18397 if (!opaque_ftype_opaque)
18398 opaque_ftype_opaque
18399 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18400 type = opaque_ftype_opaque;
18402 else
18404 enum insn_code icode = d->icode;
18405 if (d->name == 0)
18407 if (TARGET_DEBUG_BUILTIN)
18408 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18409 (long unsigned) i);
18410 continue;
18412 if (icode == CODE_FOR_nothing)
18414 if (TARGET_DEBUG_BUILTIN)
18415 fprintf (stderr,
18416 "rs6000_builtin, skip no-argument %s (no code)\n",
18417 d->name);
18418 continue;
18420 mode0 = insn_data[icode].operand[0].mode;
18421 if (mode0 == V2SImode)
18423 /* code for SPE */
18424 if (! (type = v2si_ftype))
18426 v2si_ftype
18427 = build_function_type_list (opaque_V2SI_type_node,
18428 NULL_TREE);
18429 type = v2si_ftype;
18432 else
18433 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18434 d->code, d->name);
18436 def_builtin (d->name, type, d->code);
18440 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18441 static void
18442 init_float128_ibm (machine_mode mode)
18444 if (!TARGET_XL_COMPAT)
18446 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18447 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18448 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18449 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18451 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18453 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18454 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18455 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18456 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18457 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18458 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18459 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18461 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18462 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18463 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18464 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18465 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18466 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18467 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18468 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18471 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18472 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18474 else
18476 set_optab_libfunc (add_optab, mode, "_xlqadd");
18477 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18478 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18479 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18482 /* Add various conversions for IFmode to use the traditional TFmode
18483 names. */
18484 if (mode == IFmode)
18486 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18487 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18488 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18489 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18490 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18491 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18493 if (TARGET_POWERPC64)
18495 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18496 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18497 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18498 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18503 /* Set up IEEE 128-bit floating point routines. Use different names if the
18504 arguments can be passed in a vector register. The historical PowerPC
18505 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18506 continue to use that if we aren't using vector registers to pass IEEE
18507 128-bit floating point. */
18509 static void
18510 init_float128_ieee (machine_mode mode)
18512 if (FLOAT128_VECTOR_P (mode))
18514 set_optab_libfunc (add_optab, mode, "__addkf3");
18515 set_optab_libfunc (sub_optab, mode, "__subkf3");
18516 set_optab_libfunc (neg_optab, mode, "__negkf2");
18517 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18518 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18519 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18520 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18522 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18523 set_optab_libfunc (ne_optab, mode, "__nekf2");
18524 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18525 set_optab_libfunc (ge_optab, mode, "__gekf2");
18526 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18527 set_optab_libfunc (le_optab, mode, "__lekf2");
18528 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18530 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18531 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18532 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18533 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18535 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18536 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18537 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18539 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18540 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18541 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18543 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18544 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18545 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18546 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18547 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18548 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18550 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18551 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18552 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18553 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18555 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18556 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18557 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18558 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18560 if (TARGET_POWERPC64)
18562 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18563 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18564 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18565 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18569 else
18571 set_optab_libfunc (add_optab, mode, "_q_add");
18572 set_optab_libfunc (sub_optab, mode, "_q_sub");
18573 set_optab_libfunc (neg_optab, mode, "_q_neg");
18574 set_optab_libfunc (smul_optab, mode, "_q_mul");
18575 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18576 if (TARGET_PPC_GPOPT)
18577 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18579 set_optab_libfunc (eq_optab, mode, "_q_feq");
18580 set_optab_libfunc (ne_optab, mode, "_q_fne");
18581 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18582 set_optab_libfunc (ge_optab, mode, "_q_fge");
18583 set_optab_libfunc (lt_optab, mode, "_q_flt");
18584 set_optab_libfunc (le_optab, mode, "_q_fle");
18586 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18587 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18588 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18589 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18590 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18591 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18592 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18593 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18597 static void
18598 rs6000_init_libfuncs (void)
18600 /* __float128 support. */
18601 if (TARGET_FLOAT128_TYPE)
18603 init_float128_ibm (IFmode);
18604 init_float128_ieee (KFmode);
18607 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18608 if (TARGET_LONG_DOUBLE_128)
18610 if (!TARGET_IEEEQUAD)
18611 init_float128_ibm (TFmode);
18613 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18614 else
18615 init_float128_ieee (TFmode);
18620 /* Expand a block clear operation, and return 1 if successful. Return 0
18621 if we should let the compiler generate normal code.
18623 operands[0] is the destination
18624 operands[1] is the length
18625 operands[3] is the alignment */
18628 expand_block_clear (rtx operands[])
18630 rtx orig_dest = operands[0];
18631 rtx bytes_rtx = operands[1];
18632 rtx align_rtx = operands[3];
18633 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18634 HOST_WIDE_INT align;
18635 HOST_WIDE_INT bytes;
18636 int offset;
18637 int clear_bytes;
18638 int clear_step;
18640 /* If this is not a fixed size move, just call memcpy */
18641 if (! constp)
18642 return 0;
18644 /* This must be a fixed size alignment */
18645 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18646 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18648 /* Anything to clear? */
18649 bytes = INTVAL (bytes_rtx);
18650 if (bytes <= 0)
18651 return 1;
18653 /* Use the builtin memset after a point, to avoid huge code bloat.
18654 When optimize_size, avoid any significant code bloat; calling
18655 memset is about 4 instructions, so allow for one instruction to
18656 load zero and three to do clearing. */
18657 if (TARGET_ALTIVEC && align >= 128)
18658 clear_step = 16;
18659 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18660 clear_step = 8;
18661 else if (TARGET_SPE && align >= 64)
18662 clear_step = 8;
18663 else
18664 clear_step = 4;
18666 if (optimize_size && bytes > 3 * clear_step)
18667 return 0;
18668 if (! optimize_size && bytes > 8 * clear_step)
18669 return 0;
18671 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18673 machine_mode mode = BLKmode;
18674 rtx dest;
18676 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18678 clear_bytes = 16;
18679 mode = V4SImode;
18681 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18683 clear_bytes = 8;
18684 mode = V2SImode;
18686 else if (bytes >= 8 && TARGET_POWERPC64
18687 && (align >= 64 || !STRICT_ALIGNMENT))
18689 clear_bytes = 8;
18690 mode = DImode;
18691 if (offset == 0 && align < 64)
18693 rtx addr;
18695 /* If the address form is reg+offset with offset not a
18696 multiple of four, reload into reg indirect form here
18697 rather than waiting for reload. This way we get one
18698 reload, not one per store. */
18699 addr = XEXP (orig_dest, 0);
18700 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18701 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18702 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18704 addr = copy_addr_to_reg (addr);
18705 orig_dest = replace_equiv_address (orig_dest, addr);
18709 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18710 { /* move 4 bytes */
18711 clear_bytes = 4;
18712 mode = SImode;
18714 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18715 { /* move 2 bytes */
18716 clear_bytes = 2;
18717 mode = HImode;
18719 else /* move 1 byte at a time */
18721 clear_bytes = 1;
18722 mode = QImode;
18725 dest = adjust_address (orig_dest, mode, offset);
18727 emit_move_insn (dest, CONST0_RTX (mode));
18730 return 1;
18733 /* Emit a potentially record-form instruction, setting DST from SRC.
18734 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18735 signed comparison of DST with zero. If DOT is 1, the generated RTL
18736 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18737 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18738 a separate COMPARE. */
18740 static void
18741 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18743 if (dot == 0)
18745 emit_move_insn (dst, src);
18746 return;
18749 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18751 emit_move_insn (dst, src);
18752 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18753 return;
18756 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18757 if (dot == 1)
18759 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18760 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18762 else
18764 rtx set = gen_rtx_SET (dst, src);
18765 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18770 /* Figure out the correct instructions to generate to load data for
18771 block compare. MODE is used for the read from memory, and
18772 data is zero extended if REG is wider than MODE. If LE code
18773 is being generated, bswap loads are used.
18775 REG is the destination register to move the data into.
18776 MEM is the memory block being read.
18777 MODE is the mode of memory to use for the read. */
18778 static void
18779 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
18781 switch (GET_MODE (reg))
18783 case DImode:
18784 switch (mode)
18786 case QImode:
18787 emit_insn (gen_zero_extendqidi2 (reg, mem));
18788 break;
18789 case HImode:
18791 rtx src = mem;
18792 if (!BYTES_BIG_ENDIAN)
18794 src = gen_reg_rtx (HImode);
18795 emit_insn (gen_bswaphi2 (src, mem));
18797 emit_insn (gen_zero_extendhidi2 (reg, src));
18798 break;
18800 case SImode:
18802 rtx src = mem;
18803 if (!BYTES_BIG_ENDIAN)
18805 src = gen_reg_rtx (SImode);
18806 emit_insn (gen_bswapsi2 (src, mem));
18808 emit_insn (gen_zero_extendsidi2 (reg, src));
18810 break;
18811 case DImode:
18812 if (!BYTES_BIG_ENDIAN)
18813 emit_insn (gen_bswapdi2 (reg, mem));
18814 else
18815 emit_insn (gen_movdi (reg, mem));
18816 break;
18817 default:
18818 gcc_unreachable ();
18820 break;
18822 case SImode:
18823 switch (mode)
18825 case QImode:
18826 emit_insn (gen_zero_extendqisi2 (reg, mem));
18827 break;
18828 case HImode:
18830 rtx src = mem;
18831 if (!BYTES_BIG_ENDIAN)
18833 src = gen_reg_rtx (HImode);
18834 emit_insn (gen_bswaphi2 (src, mem));
18836 emit_insn (gen_zero_extendhisi2 (reg, src));
18837 break;
18839 case SImode:
18840 if (!BYTES_BIG_ENDIAN)
18841 emit_insn (gen_bswapsi2 (reg, mem));
18842 else
18843 emit_insn (gen_movsi (reg, mem));
18844 break;
18845 case DImode:
18846 /* DImode is larger than the destination reg so is not expected. */
18847 gcc_unreachable ();
18848 break;
18849 default:
18850 gcc_unreachable ();
18852 break;
18853 default:
18854 gcc_unreachable ();
18855 break;
18859 /* Select the mode to be used for reading the next chunk of bytes
18860 in the compare.
18862 OFFSET is the current read offset from the beginning of the block.
18863 BYTES is the number of bytes remaining to be read.
18864 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
18865 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
18866 the largest allowable mode. */
18867 static machine_mode
18868 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
18869 HOST_WIDE_INT align, bool word_mode_ok)
18871 /* First see if we can do a whole load unit
18872 as that will be more efficient than a larger load + shift. */
18874 /* If big, use biggest chunk.
18875 If exactly chunk size, use that size.
18876 If remainder can be done in one piece with shifting, do that.
18877 Do largest chunk possible without violating alignment rules. */
18879 /* The most we can read without potential page crossing. */
18880 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
18882 if (word_mode_ok && bytes >= UNITS_PER_WORD)
18883 return word_mode;
18884 else if (bytes == GET_MODE_SIZE (SImode))
18885 return SImode;
18886 else if (bytes == GET_MODE_SIZE (HImode))
18887 return HImode;
18888 else if (bytes == GET_MODE_SIZE (QImode))
18889 return QImode;
18890 else if (bytes < GET_MODE_SIZE (SImode)
18891 && offset >= GET_MODE_SIZE (SImode) - bytes)
18892 /* This matches the case were we have SImode and 3 bytes
18893 and offset >= 1 and permits us to move back one and overlap
18894 with the previous read, thus avoiding having to shift
18895 unwanted bytes off of the input. */
18896 return SImode;
18897 else if (word_mode_ok && bytes < UNITS_PER_WORD
18898 && offset >= UNITS_PER_WORD-bytes)
18899 /* Similarly, if we can use DImode it will get matched here and
18900 can do an overlapping read that ends at the end of the block. */
18901 return word_mode;
18902 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
18903 /* It is safe to do all remaining in one load of largest size,
18904 possibly with a shift to get rid of unwanted bytes. */
18905 return word_mode;
18906 else if (maxread >= GET_MODE_SIZE (SImode))
18907 /* It is safe to do all remaining in one SImode load,
18908 possibly with a shift to get rid of unwanted bytes. */
18909 return SImode;
18910 else if (bytes > GET_MODE_SIZE (SImode))
18911 return SImode;
18912 else if (bytes > GET_MODE_SIZE (HImode))
18913 return HImode;
18915 /* final fallback is do one byte */
18916 return QImode;
18919 /* Compute the alignment of pointer+OFFSET where the original alignment
18920 of pointer was BASE_ALIGN. */
18921 static HOST_WIDE_INT
18922 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
18924 if (offset == 0)
18925 return base_align;
18926 return min (base_align, offset & -offset);
18929 /* Expand a block compare operation, and return true if successful.
18930 Return false if we should let the compiler generate normal code,
18931 probably a memcmp call.
18933 OPERANDS[0] is the target (result).
18934 OPERANDS[1] is the first source.
18935 OPERANDS[2] is the second source.
18936 OPERANDS[3] is the length.
18937 OPERANDS[4] is the alignment. */
18938 bool
18939 expand_block_compare (rtx operands[])
18941 rtx target = operands[0];
18942 rtx orig_src1 = operands[1];
18943 rtx orig_src2 = operands[2];
18944 rtx bytes_rtx = operands[3];
18945 rtx align_rtx = operands[4];
18946 HOST_WIDE_INT cmp_bytes = 0;
18947 rtx src1 = orig_src1;
18948 rtx src2 = orig_src2;
18950 /* If this is not a fixed size compare, just call memcmp */
18951 if (!CONST_INT_P (bytes_rtx))
18952 return false;
18954 /* This must be a fixed size alignment */
18955 if (!CONST_INT_P (align_rtx))
18956 return false;
18958 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
18960 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
18961 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
18962 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
18963 return false;
18965 gcc_assert (GET_MODE (target) == SImode);
18967 /* Anything to move? */
18968 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
18969 if (bytes <= 0)
18970 return true;
18972 /* The code generated for p7 and older is not faster than glibc
18973 memcmp if alignment is small and length is not short, so bail
18974 out to avoid those conditions. */
18975 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
18976 && ((base_align == 1 && bytes > 16)
18977 || (base_align == 2 && bytes > 32)))
18978 return false;
18980 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
18981 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
18983 /* If we have an LE target without ldbrx and word_mode is DImode,
18984 then we must avoid using word_mode. */
18985 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
18986 && word_mode == DImode);
18988 /* Strategy phase. How many ops will this take and should we expand it? */
18990 int offset = 0;
18991 machine_mode load_mode =
18992 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
18993 int load_mode_size = GET_MODE_SIZE (load_mode);
18995 /* We don't want to generate too much code. */
18996 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
18997 > rs6000_block_compare_inline_limit)
18998 return false;
19000 bool generate_6432_conversion = false;
19001 rtx convert_label = NULL;
19002 rtx final_label = NULL;
19004 /* Example of generated code for 11 bytes aligned 1 byte:
19005 .L10:
19006 ldbrx 10,6,9
19007 ldbrx 9,7,9
19008 subf. 9,9,10
19009 bne 0,.L8
19010 addi 9,4,7
19011 lwbrx 10,0,9
19012 addi 9,5,7
19013 lwbrx 9,0,9
19014 subf 9,9,10
19015 b .L9
19016 .L8: # convert_label
19017 cntlzd 9,9
19018 addi 9,9,-1
19019 xori 9,9,0x3f
19020 .L9: # final_label
19022 We start off with DImode and have a compare/branch to something
19023 with a smaller mode then we will need a block with the DI->SI conversion
19024 that may or may not be executed. */
19026 while (bytes > 0)
19028 int align = compute_current_alignment (base_align, offset);
19029 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19030 load_mode = select_block_compare_mode (offset, bytes, align,
19031 word_mode_ok);
19032 else
19033 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
19034 load_mode_size = GET_MODE_SIZE (load_mode);
19035 if (bytes >= load_mode_size)
19036 cmp_bytes = load_mode_size;
19037 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
19039 /* Move this load back so it doesn't go past the end.
19040 P8/P9 can do this efficiently. */
19041 int extra_bytes = load_mode_size - bytes;
19042 cmp_bytes = bytes;
19043 if (extra_bytes < offset)
19045 offset -= extra_bytes;
19046 cmp_bytes = load_mode_size;
19047 bytes = cmp_bytes;
19050 else
19051 /* P7 and earlier can't do the overlapping load trick fast,
19052 so this forces a non-overlapping load and a shift to get
19053 rid of the extra bytes. */
19054 cmp_bytes = bytes;
19056 src1 = adjust_address (orig_src1, load_mode, offset);
19057 src2 = adjust_address (orig_src2, load_mode, offset);
19059 if (!REG_P (XEXP (src1, 0)))
19061 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
19062 src1 = replace_equiv_address (src1, src1_reg);
19064 set_mem_size (src1, cmp_bytes);
19066 if (!REG_P (XEXP (src2, 0)))
19068 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
19069 src2 = replace_equiv_address (src2, src2_reg);
19071 set_mem_size (src2, cmp_bytes);
19073 do_load_for_compare (tmp_reg_src1, src1, load_mode);
19074 do_load_for_compare (tmp_reg_src2, src2, load_mode);
19076 if (cmp_bytes < load_mode_size)
19078 /* Shift unneeded bytes off. */
19079 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
19080 if (word_mode == DImode)
19082 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
19083 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
19085 else
19087 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
19088 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
19092 /* We previously did a block that need 64->32 conversion but
19093 the current block does not, so a label is needed to jump
19094 to the end. */
19095 if (generate_6432_conversion && !final_label
19096 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
19097 final_label = gen_label_rtx ();
19099 /* Do we need a 64->32 conversion block? */
19100 int remain = bytes - cmp_bytes;
19101 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
19103 generate_6432_conversion = true;
19104 if (remain > 0 && !convert_label)
19105 convert_label = gen_label_rtx ();
19108 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
19110 /* Target is larger than load size so we don't need to
19111 reduce result size. */
19112 if (remain > 0)
19114 /* This is not the last block, branch to the end if the result
19115 of this subtract is not zero. */
19116 if (!final_label)
19117 final_label = gen_label_rtx ();
19118 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19119 rtx cond = gen_reg_rtx (CCmode);
19120 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
19121 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19122 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19123 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19124 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19125 fin_ref, pc_rtx);
19126 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19127 JUMP_LABEL (j) = final_label;
19128 LABEL_NUSES (final_label) += 1;
19130 else
19132 if (word_mode == DImode)
19134 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19135 tmp_reg_src2));
19136 emit_insn (gen_movsi (target,
19137 gen_lowpart (SImode, tmp_reg_src2)));
19139 else
19140 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
19142 if (final_label)
19144 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
19145 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
19146 JUMP_LABEL(j) = final_label;
19147 LABEL_NUSES (final_label) += 1;
19148 emit_barrier ();
19152 else
19154 generate_6432_conversion = true;
19155 if (remain > 0)
19157 if (!convert_label)
19158 convert_label = gen_label_rtx ();
19160 /* Compare to zero and branch to convert_label if not zero. */
19161 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
19162 rtx cond = gen_reg_rtx (CCmode);
19163 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
19164 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
19165 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19166 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
19167 cvt_ref, pc_rtx);
19168 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
19169 JUMP_LABEL(j) = convert_label;
19170 LABEL_NUSES (convert_label) += 1;
19172 else
19174 /* Just do the subtract. Since this is the last block the
19175 convert code will be generated immediately following. */
19176 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
19177 tmp_reg_src2));
19181 offset += cmp_bytes;
19182 bytes -= cmp_bytes;
19185 if (generate_6432_conversion)
19187 if (convert_label)
19188 emit_label (convert_label);
19190 /* We need to produce DI result from sub, then convert to target SI
19191 while maintaining <0 / ==0 / >0 properties.
19192 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
19193 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
19194 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
19195 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
19196 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
19199 if (final_label)
19200 emit_label (final_label);
19202 gcc_assert (bytes == 0);
19203 return true;
19207 /* Expand a block move operation, and return 1 if successful. Return 0
19208 if we should let the compiler generate normal code.
19210 operands[0] is the destination
19211 operands[1] is the source
19212 operands[2] is the length
19213 operands[3] is the alignment */
19215 #define MAX_MOVE_REG 4
19218 expand_block_move (rtx operands[])
19220 rtx orig_dest = operands[0];
19221 rtx orig_src = operands[1];
19222 rtx bytes_rtx = operands[2];
19223 rtx align_rtx = operands[3];
19224 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
19225 int align;
19226 int bytes;
19227 int offset;
19228 int move_bytes;
19229 rtx stores[MAX_MOVE_REG];
19230 int num_reg = 0;
19232 /* If this is not a fixed size move, just call memcpy */
19233 if (! constp)
19234 return 0;
19236 /* This must be a fixed size alignment */
19237 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19238 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19240 /* Anything to move? */
19241 bytes = INTVAL (bytes_rtx);
19242 if (bytes <= 0)
19243 return 1;
19245 if (bytes > rs6000_block_move_inline_limit)
19246 return 0;
19248 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
19250 union {
19251 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
19252 rtx (*mov) (rtx, rtx);
19253 } gen_func;
19254 machine_mode mode = BLKmode;
19255 rtx src, dest;
19257 /* Altivec first, since it will be faster than a string move
19258 when it applies, and usually not significantly larger. */
19259 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
19261 move_bytes = 16;
19262 mode = V4SImode;
19263 gen_func.mov = gen_movv4si;
19265 else if (TARGET_SPE && bytes >= 8 && align >= 64)
19267 move_bytes = 8;
19268 mode = V2SImode;
19269 gen_func.mov = gen_movv2si;
19271 else if (TARGET_STRING
19272 && bytes > 24 /* move up to 32 bytes at a time */
19273 && ! fixed_regs[5]
19274 && ! fixed_regs[6]
19275 && ! fixed_regs[7]
19276 && ! fixed_regs[8]
19277 && ! fixed_regs[9]
19278 && ! fixed_regs[10]
19279 && ! fixed_regs[11]
19280 && ! fixed_regs[12])
19282 move_bytes = (bytes > 32) ? 32 : bytes;
19283 gen_func.movmemsi = gen_movmemsi_8reg;
19285 else if (TARGET_STRING
19286 && bytes > 16 /* move up to 24 bytes at a time */
19287 && ! fixed_regs[5]
19288 && ! fixed_regs[6]
19289 && ! fixed_regs[7]
19290 && ! fixed_regs[8]
19291 && ! fixed_regs[9]
19292 && ! fixed_regs[10])
19294 move_bytes = (bytes > 24) ? 24 : bytes;
19295 gen_func.movmemsi = gen_movmemsi_6reg;
19297 else if (TARGET_STRING
19298 && bytes > 8 /* move up to 16 bytes at a time */
19299 && ! fixed_regs[5]
19300 && ! fixed_regs[6]
19301 && ! fixed_regs[7]
19302 && ! fixed_regs[8])
19304 move_bytes = (bytes > 16) ? 16 : bytes;
19305 gen_func.movmemsi = gen_movmemsi_4reg;
19307 else if (bytes >= 8 && TARGET_POWERPC64
19308 && (align >= 64 || !STRICT_ALIGNMENT))
19310 move_bytes = 8;
19311 mode = DImode;
19312 gen_func.mov = gen_movdi;
19313 if (offset == 0 && align < 64)
19315 rtx addr;
19317 /* If the address form is reg+offset with offset not a
19318 multiple of four, reload into reg indirect form here
19319 rather than waiting for reload. This way we get one
19320 reload, not one per load and/or store. */
19321 addr = XEXP (orig_dest, 0);
19322 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19323 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19324 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19326 addr = copy_addr_to_reg (addr);
19327 orig_dest = replace_equiv_address (orig_dest, addr);
19329 addr = XEXP (orig_src, 0);
19330 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19331 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19332 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19334 addr = copy_addr_to_reg (addr);
19335 orig_src = replace_equiv_address (orig_src, addr);
19339 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
19340 { /* move up to 8 bytes at a time */
19341 move_bytes = (bytes > 8) ? 8 : bytes;
19342 gen_func.movmemsi = gen_movmemsi_2reg;
19344 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19345 { /* move 4 bytes */
19346 move_bytes = 4;
19347 mode = SImode;
19348 gen_func.mov = gen_movsi;
19350 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19351 { /* move 2 bytes */
19352 move_bytes = 2;
19353 mode = HImode;
19354 gen_func.mov = gen_movhi;
19356 else if (TARGET_STRING && bytes > 1)
19357 { /* move up to 4 bytes at a time */
19358 move_bytes = (bytes > 4) ? 4 : bytes;
19359 gen_func.movmemsi = gen_movmemsi_1reg;
19361 else /* move 1 byte at a time */
19363 move_bytes = 1;
19364 mode = QImode;
19365 gen_func.mov = gen_movqi;
19368 src = adjust_address (orig_src, mode, offset);
19369 dest = adjust_address (orig_dest, mode, offset);
19371 if (mode != BLKmode)
19373 rtx tmp_reg = gen_reg_rtx (mode);
19375 emit_insn ((*gen_func.mov) (tmp_reg, src));
19376 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
19379 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
19381 int i;
19382 for (i = 0; i < num_reg; i++)
19383 emit_insn (stores[i]);
19384 num_reg = 0;
19387 if (mode == BLKmode)
19389 /* Move the address into scratch registers. The movmemsi
19390 patterns require zero offset. */
19391 if (!REG_P (XEXP (src, 0)))
19393 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
19394 src = replace_equiv_address (src, src_reg);
19396 set_mem_size (src, move_bytes);
19398 if (!REG_P (XEXP (dest, 0)))
19400 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
19401 dest = replace_equiv_address (dest, dest_reg);
19403 set_mem_size (dest, move_bytes);
19405 emit_insn ((*gen_func.movmemsi) (dest, src,
19406 GEN_INT (move_bytes & 31),
19407 align_rtx));
19411 return 1;
19415 /* Return a string to perform a load_multiple operation.
19416 operands[0] is the vector.
19417 operands[1] is the source address.
19418 operands[2] is the first destination register. */
19420 const char *
19421 rs6000_output_load_multiple (rtx operands[3])
19423 /* We have to handle the case where the pseudo used to contain the address
19424 is assigned to one of the output registers. */
19425 int i, j;
19426 int words = XVECLEN (operands[0], 0);
19427 rtx xop[10];
19429 if (XVECLEN (operands[0], 0) == 1)
19430 return "lwz %2,0(%1)";
19432 for (i = 0; i < words; i++)
19433 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
19435 if (i == words-1)
19437 xop[0] = GEN_INT (4 * (words-1));
19438 xop[1] = operands[1];
19439 xop[2] = operands[2];
19440 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
19441 return "";
19443 else if (i == 0)
19445 xop[0] = GEN_INT (4 * (words-1));
19446 xop[1] = operands[1];
19447 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
19448 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
19449 return "";
19451 else
19453 for (j = 0; j < words; j++)
19454 if (j != i)
19456 xop[0] = GEN_INT (j * 4);
19457 xop[1] = operands[1];
19458 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
19459 output_asm_insn ("lwz %2,%0(%1)", xop);
19461 xop[0] = GEN_INT (i * 4);
19462 xop[1] = operands[1];
19463 output_asm_insn ("lwz %1,%0(%1)", xop);
19464 return "";
19468 return "lswi %2,%1,%N0";
19472 /* A validation routine: say whether CODE, a condition code, and MODE
19473 match. The other alternatives either don't make sense or should
19474 never be generated. */
19476 void
19477 validate_condition_mode (enum rtx_code code, machine_mode mode)
19479 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
19480 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
19481 && GET_MODE_CLASS (mode) == MODE_CC);
19483 /* These don't make sense. */
19484 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
19485 || mode != CCUNSmode);
19487 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
19488 || mode == CCUNSmode);
19490 gcc_assert (mode == CCFPmode
19491 || (code != ORDERED && code != UNORDERED
19492 && code != UNEQ && code != LTGT
19493 && code != UNGT && code != UNLT
19494 && code != UNGE && code != UNLE));
19496 /* These should never be generated except for
19497 flag_finite_math_only. */
19498 gcc_assert (mode != CCFPmode
19499 || flag_finite_math_only
19500 || (code != LE && code != GE
19501 && code != UNEQ && code != LTGT
19502 && code != UNGT && code != UNLT));
19504 /* These are invalid; the information is not there. */
19505 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
19509 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
19510 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
19511 not zero, store there the bit offset (counted from the right) where
19512 the single stretch of 1 bits begins; and similarly for B, the bit
19513 offset where it ends. */
19515 bool
19516 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
19518 unsigned HOST_WIDE_INT val = INTVAL (mask);
19519 unsigned HOST_WIDE_INT bit;
19520 int nb, ne;
19521 int n = GET_MODE_PRECISION (mode);
19523 if (mode != DImode && mode != SImode)
19524 return false;
19526 if (INTVAL (mask) >= 0)
19528 bit = val & -val;
19529 ne = exact_log2 (bit);
19530 nb = exact_log2 (val + bit);
19532 else if (val + 1 == 0)
19534 nb = n;
19535 ne = 0;
19537 else if (val & 1)
19539 val = ~val;
19540 bit = val & -val;
19541 nb = exact_log2 (bit);
19542 ne = exact_log2 (val + bit);
19544 else
19546 bit = val & -val;
19547 ne = exact_log2 (bit);
19548 if (val + bit == 0)
19549 nb = n;
19550 else
19551 nb = 0;
19554 nb--;
19556 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
19557 return false;
19559 if (b)
19560 *b = nb;
19561 if (e)
19562 *e = ne;
19564 return true;
19567 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
19568 or rldicr instruction, to implement an AND with it in mode MODE. */
19570 bool
19571 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
19573 int nb, ne;
19575 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19576 return false;
19578 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
19579 does not wrap. */
19580 if (mode == DImode)
19581 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
19583 /* For SImode, rlwinm can do everything. */
19584 if (mode == SImode)
19585 return (nb < 32 && ne < 32);
19587 return false;
19590 /* Return the instruction template for an AND with mask in mode MODE, with
19591 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19593 const char *
19594 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
19596 int nb, ne;
19598 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
19599 gcc_unreachable ();
19601 if (mode == DImode && ne == 0)
19603 operands[3] = GEN_INT (63 - nb);
19604 if (dot)
19605 return "rldicl. %0,%1,0,%3";
19606 return "rldicl %0,%1,0,%3";
19609 if (mode == DImode && nb == 63)
19611 operands[3] = GEN_INT (63 - ne);
19612 if (dot)
19613 return "rldicr. %0,%1,0,%3";
19614 return "rldicr %0,%1,0,%3";
19617 if (nb < 32 && ne < 32)
19619 operands[3] = GEN_INT (31 - nb);
19620 operands[4] = GEN_INT (31 - ne);
19621 if (dot)
19622 return "rlwinm. %0,%1,0,%3,%4";
19623 return "rlwinm %0,%1,0,%3,%4";
19626 gcc_unreachable ();
19629 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
19630 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
19631 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
19633 bool
19634 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
19636 int nb, ne;
19638 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19639 return false;
19641 int n = GET_MODE_PRECISION (mode);
19642 int sh = -1;
19644 if (CONST_INT_P (XEXP (shift, 1)))
19646 sh = INTVAL (XEXP (shift, 1));
19647 if (sh < 0 || sh >= n)
19648 return false;
19651 rtx_code code = GET_CODE (shift);
19653 /* Convert any shift by 0 to a rotate, to simplify below code. */
19654 if (sh == 0)
19655 code = ROTATE;
19657 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19658 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19659 code = ASHIFT;
19660 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19662 code = LSHIFTRT;
19663 sh = n - sh;
19666 /* DImode rotates need rld*. */
19667 if (mode == DImode && code == ROTATE)
19668 return (nb == 63 || ne == 0 || ne == sh);
19670 /* SImode rotates need rlw*. */
19671 if (mode == SImode && code == ROTATE)
19672 return (nb < 32 && ne < 32 && sh < 32);
19674 /* Wrap-around masks are only okay for rotates. */
19675 if (ne > nb)
19676 return false;
19678 /* Variable shifts are only okay for rotates. */
19679 if (sh < 0)
19680 return false;
19682 /* Don't allow ASHIFT if the mask is wrong for that. */
19683 if (code == ASHIFT && ne < sh)
19684 return false;
19686 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
19687 if the mask is wrong for that. */
19688 if (nb < 32 && ne < 32 && sh < 32
19689 && !(code == LSHIFTRT && nb >= 32 - sh))
19690 return true;
19692 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
19693 if the mask is wrong for that. */
19694 if (code == LSHIFTRT)
19695 sh = 64 - sh;
19696 if (nb == 63 || ne == 0 || ne == sh)
19697 return !(code == LSHIFTRT && nb >= sh);
19699 return false;
19702 /* Return the instruction template for a shift with mask in mode MODE, with
19703 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19705 const char *
19706 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
19708 int nb, ne;
19710 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19711 gcc_unreachable ();
19713 if (mode == DImode && ne == 0)
19715 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19716 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19717 operands[3] = GEN_INT (63 - nb);
19718 if (dot)
19719 return "rld%I2cl. %0,%1,%2,%3";
19720 return "rld%I2cl %0,%1,%2,%3";
19723 if (mode == DImode && nb == 63)
19725 operands[3] = GEN_INT (63 - ne);
19726 if (dot)
19727 return "rld%I2cr. %0,%1,%2,%3";
19728 return "rld%I2cr %0,%1,%2,%3";
19731 if (mode == DImode
19732 && GET_CODE (operands[4]) != LSHIFTRT
19733 && CONST_INT_P (operands[2])
19734 && ne == INTVAL (operands[2]))
19736 operands[3] = GEN_INT (63 - nb);
19737 if (dot)
19738 return "rld%I2c. %0,%1,%2,%3";
19739 return "rld%I2c %0,%1,%2,%3";
19742 if (nb < 32 && ne < 32)
19744 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19745 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19746 operands[3] = GEN_INT (31 - nb);
19747 operands[4] = GEN_INT (31 - ne);
19748 /* This insn can also be a 64-bit rotate with mask that really makes
19749 it just a shift right (with mask); the %h below are to adjust for
19750 that situation (shift count is >= 32 in that case). */
19751 if (dot)
19752 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19753 return "rlw%I2nm %0,%1,%h2,%3,%4";
19756 gcc_unreachable ();
19759 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19760 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19761 ASHIFT, or LSHIFTRT) in mode MODE. */
19763 bool
19764 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19766 int nb, ne;
19768 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19769 return false;
19771 int n = GET_MODE_PRECISION (mode);
19773 int sh = INTVAL (XEXP (shift, 1));
19774 if (sh < 0 || sh >= n)
19775 return false;
19777 rtx_code code = GET_CODE (shift);
19779 /* Convert any shift by 0 to a rotate, to simplify below code. */
19780 if (sh == 0)
19781 code = ROTATE;
19783 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19784 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19785 code = ASHIFT;
19786 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19788 code = LSHIFTRT;
19789 sh = n - sh;
19792 /* DImode rotates need rldimi. */
19793 if (mode == DImode && code == ROTATE)
19794 return (ne == sh);
19796 /* SImode rotates need rlwimi. */
19797 if (mode == SImode && code == ROTATE)
19798 return (nb < 32 && ne < 32 && sh < 32);
19800 /* Wrap-around masks are only okay for rotates. */
19801 if (ne > nb)
19802 return false;
19804 /* Don't allow ASHIFT if the mask is wrong for that. */
19805 if (code == ASHIFT && ne < sh)
19806 return false;
19808 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19809 if the mask is wrong for that. */
19810 if (nb < 32 && ne < 32 && sh < 32
19811 && !(code == LSHIFTRT && nb >= 32 - sh))
19812 return true;
19814 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19815 if the mask is wrong for that. */
19816 if (code == LSHIFTRT)
19817 sh = 64 - sh;
19818 if (ne == sh)
19819 return !(code == LSHIFTRT && nb >= sh);
19821 return false;
19824 /* Return the instruction template for an insert with mask in mode MODE, with
19825 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19827 const char *
19828 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19830 int nb, ne;
19832 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19833 gcc_unreachable ();
19835 /* Prefer rldimi because rlwimi is cracked. */
19836 if (TARGET_POWERPC64
19837 && (!dot || mode == DImode)
19838 && GET_CODE (operands[4]) != LSHIFTRT
19839 && ne == INTVAL (operands[2]))
19841 operands[3] = GEN_INT (63 - nb);
19842 if (dot)
19843 return "rldimi. %0,%1,%2,%3";
19844 return "rldimi %0,%1,%2,%3";
19847 if (nb < 32 && ne < 32)
19849 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19850 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19851 operands[3] = GEN_INT (31 - nb);
19852 operands[4] = GEN_INT (31 - ne);
19853 if (dot)
19854 return "rlwimi. %0,%1,%2,%3,%4";
19855 return "rlwimi %0,%1,%2,%3,%4";
19858 gcc_unreachable ();
19861 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19862 using two machine instructions. */
19864 bool
19865 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19867 /* There are two kinds of AND we can handle with two insns:
19868 1) those we can do with two rl* insn;
19869 2) ori[s];xori[s].
19871 We do not handle that last case yet. */
19873 /* If there is just one stretch of ones, we can do it. */
19874 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19875 return true;
19877 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19878 one insn, we can do the whole thing with two. */
19879 unsigned HOST_WIDE_INT val = INTVAL (c);
19880 unsigned HOST_WIDE_INT bit1 = val & -val;
19881 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19882 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19883 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19884 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19887 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19888 If EXPAND is true, split rotate-and-mask instructions we generate to
19889 their constituent parts as well (this is used during expand); if DOT
19890 is 1, make the last insn a record-form instruction clobbering the
19891 destination GPR and setting the CC reg (from operands[3]); if 2, set
19892 that GPR as well as the CC reg. */
19894 void
19895 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19897 gcc_assert (!(expand && dot));
19899 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19901 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19902 shift right. This generates better code than doing the masks without
19903 shifts, or shifting first right and then left. */
19904 int nb, ne;
19905 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19907 gcc_assert (mode == DImode);
19909 int shift = 63 - nb;
19910 if (expand)
19912 rtx tmp1 = gen_reg_rtx (DImode);
19913 rtx tmp2 = gen_reg_rtx (DImode);
19914 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19915 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19916 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19918 else
19920 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19921 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19922 emit_move_insn (operands[0], tmp);
19923 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19924 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19926 return;
19929 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19930 that does the rest. */
19931 unsigned HOST_WIDE_INT bit1 = val & -val;
19932 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19933 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19934 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19936 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19937 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19939 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19941 /* Two "no-rotate"-and-mask instructions, for SImode. */
19942 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19944 gcc_assert (mode == SImode);
19946 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19947 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19948 emit_move_insn (reg, tmp);
19949 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19950 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19951 return;
19954 gcc_assert (mode == DImode);
19956 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19957 insns; we have to do the first in SImode, because it wraps. */
19958 if (mask2 <= 0xffffffff
19959 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19961 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19962 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19963 GEN_INT (mask1));
19964 rtx reg_low = gen_lowpart (SImode, reg);
19965 emit_move_insn (reg_low, tmp);
19966 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19967 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19968 return;
19971 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19972 at the top end), rotate back and clear the other hole. */
19973 int right = exact_log2 (bit3);
19974 int left = 64 - right;
19976 /* Rotate the mask too. */
19977 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19979 if (expand)
19981 rtx tmp1 = gen_reg_rtx (DImode);
19982 rtx tmp2 = gen_reg_rtx (DImode);
19983 rtx tmp3 = gen_reg_rtx (DImode);
19984 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19985 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19986 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19987 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19989 else
19991 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19992 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19993 emit_move_insn (operands[0], tmp);
19994 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19995 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19996 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
20000 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
20001 for lfq and stfq insns iff the registers are hard registers. */
20004 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
20006 /* We might have been passed a SUBREG. */
20007 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
20008 return 0;
20010 /* We might have been passed non floating point registers. */
20011 if (!FP_REGNO_P (REGNO (reg1))
20012 || !FP_REGNO_P (REGNO (reg2)))
20013 return 0;
20015 return (REGNO (reg1) == REGNO (reg2) - 1);
20018 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
20019 addr1 and addr2 must be in consecutive memory locations
20020 (addr2 == addr1 + 8). */
20023 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
20025 rtx addr1, addr2;
20026 unsigned int reg1, reg2;
20027 int offset1, offset2;
20029 /* The mems cannot be volatile. */
20030 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
20031 return 0;
20033 addr1 = XEXP (mem1, 0);
20034 addr2 = XEXP (mem2, 0);
20036 /* Extract an offset (if used) from the first addr. */
20037 if (GET_CODE (addr1) == PLUS)
20039 /* If not a REG, return zero. */
20040 if (GET_CODE (XEXP (addr1, 0)) != REG)
20041 return 0;
20042 else
20044 reg1 = REGNO (XEXP (addr1, 0));
20045 /* The offset must be constant! */
20046 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
20047 return 0;
20048 offset1 = INTVAL (XEXP (addr1, 1));
20051 else if (GET_CODE (addr1) != REG)
20052 return 0;
20053 else
20055 reg1 = REGNO (addr1);
20056 /* This was a simple (mem (reg)) expression. Offset is 0. */
20057 offset1 = 0;
20060 /* And now for the second addr. */
20061 if (GET_CODE (addr2) == PLUS)
20063 /* If not a REG, return zero. */
20064 if (GET_CODE (XEXP (addr2, 0)) != REG)
20065 return 0;
20066 else
20068 reg2 = REGNO (XEXP (addr2, 0));
20069 /* The offset must be constant. */
20070 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
20071 return 0;
20072 offset2 = INTVAL (XEXP (addr2, 1));
20075 else if (GET_CODE (addr2) != REG)
20076 return 0;
20077 else
20079 reg2 = REGNO (addr2);
20080 /* This was a simple (mem (reg)) expression. Offset is 0. */
20081 offset2 = 0;
20084 /* Both of these must have the same base register. */
20085 if (reg1 != reg2)
20086 return 0;
20088 /* The offset for the second addr must be 8 more than the first addr. */
20089 if (offset2 != offset1 + 8)
20090 return 0;
20092 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
20093 instructions. */
20094 return 1;
20099 rs6000_secondary_memory_needed_rtx (machine_mode mode)
20101 static bool eliminated = false;
20102 rtx ret;
20104 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
20105 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20106 else
20108 rtx mem = cfun->machine->sdmode_stack_slot;
20109 gcc_assert (mem != NULL_RTX);
20111 if (!eliminated)
20113 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
20114 cfun->machine->sdmode_stack_slot = mem;
20115 eliminated = true;
20117 ret = mem;
20120 if (TARGET_DEBUG_ADDR)
20122 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
20123 GET_MODE_NAME (mode));
20124 if (!ret)
20125 fprintf (stderr, "\tNULL_RTX\n");
20126 else
20127 debug_rtx (ret);
20130 return ret;
20133 /* Return the mode to be used for memory when a secondary memory
20134 location is needed. For SDmode values we need to use DDmode, in
20135 all other cases we can use the same mode. */
20136 machine_mode
20137 rs6000_secondary_memory_needed_mode (machine_mode mode)
20139 if (lra_in_progress && mode == SDmode)
20140 return DDmode;
20141 return mode;
20144 static tree
20145 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
20147 /* Don't walk into types. */
20148 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
20150 *walk_subtrees = 0;
20151 return NULL_TREE;
20154 switch (TREE_CODE (*tp))
20156 case VAR_DECL:
20157 case PARM_DECL:
20158 case FIELD_DECL:
20159 case RESULT_DECL:
20160 case SSA_NAME:
20161 case REAL_CST:
20162 case MEM_REF:
20163 case VIEW_CONVERT_EXPR:
20164 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
20165 return *tp;
20166 break;
20167 default:
20168 break;
20171 return NULL_TREE;
20174 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
20175 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
20176 only work on the traditional altivec registers, note if an altivec register
20177 was chosen. */
20179 static enum rs6000_reg_type
20180 register_to_reg_type (rtx reg, bool *is_altivec)
20182 HOST_WIDE_INT regno;
20183 enum reg_class rclass;
20185 if (GET_CODE (reg) == SUBREG)
20186 reg = SUBREG_REG (reg);
20188 if (!REG_P (reg))
20189 return NO_REG_TYPE;
20191 regno = REGNO (reg);
20192 if (regno >= FIRST_PSEUDO_REGISTER)
20194 if (!lra_in_progress && !reload_in_progress && !reload_completed)
20195 return PSEUDO_REG_TYPE;
20197 regno = true_regnum (reg);
20198 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
20199 return PSEUDO_REG_TYPE;
20202 gcc_assert (regno >= 0);
20204 if (is_altivec && ALTIVEC_REGNO_P (regno))
20205 *is_altivec = true;
20207 rclass = rs6000_regno_regclass[regno];
20208 return reg_class_to_reg_type[(int)rclass];
20211 /* Helper function to return the cost of adding a TOC entry address. */
20213 static inline int
20214 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
20216 int ret;
20218 if (TARGET_CMODEL != CMODEL_SMALL)
20219 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
20221 else
20222 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
20224 return ret;
20227 /* Helper function for rs6000_secondary_reload to determine whether the memory
20228 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
20229 needs reloading. Return negative if the memory is not handled by the memory
20230 helper functions and to try a different reload method, 0 if no additional
20231 instructions are need, and positive to give the extra cost for the
20232 memory. */
20234 static int
20235 rs6000_secondary_reload_memory (rtx addr,
20236 enum reg_class rclass,
20237 machine_mode mode)
20239 int extra_cost = 0;
20240 rtx reg, and_arg, plus_arg0, plus_arg1;
20241 addr_mask_type addr_mask;
20242 const char *type = NULL;
20243 const char *fail_msg = NULL;
20245 if (GPR_REG_CLASS_P (rclass))
20246 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20248 else if (rclass == FLOAT_REGS)
20249 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20251 else if (rclass == ALTIVEC_REGS)
20252 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20254 /* For the combined VSX_REGS, turn off Altivec AND -16. */
20255 else if (rclass == VSX_REGS)
20256 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
20257 & ~RELOAD_REG_AND_M16);
20259 /* If the register allocator hasn't made up its mind yet on the register
20260 class to use, settle on defaults to use. */
20261 else if (rclass == NO_REGS)
20263 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
20264 & ~RELOAD_REG_AND_M16);
20266 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
20267 addr_mask &= ~(RELOAD_REG_INDEXED
20268 | RELOAD_REG_PRE_INCDEC
20269 | RELOAD_REG_PRE_MODIFY);
20272 else
20273 addr_mask = 0;
20275 /* If the register isn't valid in this register class, just return now. */
20276 if ((addr_mask & RELOAD_REG_VALID) == 0)
20278 if (TARGET_DEBUG_ADDR)
20280 fprintf (stderr,
20281 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20282 "not valid in class\n",
20283 GET_MODE_NAME (mode), reg_class_names[rclass]);
20284 debug_rtx (addr);
20287 return -1;
20290 switch (GET_CODE (addr))
20292 /* Does the register class supports auto update forms for this mode? We
20293 don't need a scratch register, since the powerpc only supports
20294 PRE_INC, PRE_DEC, and PRE_MODIFY. */
20295 case PRE_INC:
20296 case PRE_DEC:
20297 reg = XEXP (addr, 0);
20298 if (!base_reg_operand (addr, GET_MODE (reg)))
20300 fail_msg = "no base register #1";
20301 extra_cost = -1;
20304 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20306 extra_cost = 1;
20307 type = "update";
20309 break;
20311 case PRE_MODIFY:
20312 reg = XEXP (addr, 0);
20313 plus_arg1 = XEXP (addr, 1);
20314 if (!base_reg_operand (reg, GET_MODE (reg))
20315 || GET_CODE (plus_arg1) != PLUS
20316 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
20318 fail_msg = "bad PRE_MODIFY";
20319 extra_cost = -1;
20322 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20324 extra_cost = 1;
20325 type = "update";
20327 break;
20329 /* Do we need to simulate AND -16 to clear the bottom address bits used
20330 in VMX load/stores? Only allow the AND for vector sizes. */
20331 case AND:
20332 and_arg = XEXP (addr, 0);
20333 if (GET_MODE_SIZE (mode) != 16
20334 || GET_CODE (XEXP (addr, 1)) != CONST_INT
20335 || INTVAL (XEXP (addr, 1)) != -16)
20337 fail_msg = "bad Altivec AND #1";
20338 extra_cost = -1;
20341 if (rclass != ALTIVEC_REGS)
20343 if (legitimate_indirect_address_p (and_arg, false))
20344 extra_cost = 1;
20346 else if (legitimate_indexed_address_p (and_arg, false))
20347 extra_cost = 2;
20349 else
20351 fail_msg = "bad Altivec AND #2";
20352 extra_cost = -1;
20355 type = "and";
20357 break;
20359 /* If this is an indirect address, make sure it is a base register. */
20360 case REG:
20361 case SUBREG:
20362 if (!legitimate_indirect_address_p (addr, false))
20364 extra_cost = 1;
20365 type = "move";
20367 break;
20369 /* If this is an indexed address, make sure the register class can handle
20370 indexed addresses for this mode. */
20371 case PLUS:
20372 plus_arg0 = XEXP (addr, 0);
20373 plus_arg1 = XEXP (addr, 1);
20375 /* (plus (plus (reg) (constant)) (constant)) is generated during
20376 push_reload processing, so handle it now. */
20377 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
20379 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20381 extra_cost = 1;
20382 type = "offset";
20386 /* (plus (plus (reg) (constant)) (reg)) is also generated during
20387 push_reload processing, so handle it now. */
20388 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
20390 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20392 extra_cost = 1;
20393 type = "indexed #2";
20397 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
20399 fail_msg = "no base register #2";
20400 extra_cost = -1;
20403 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
20405 if ((addr_mask & RELOAD_REG_INDEXED) == 0
20406 || !legitimate_indexed_address_p (addr, false))
20408 extra_cost = 1;
20409 type = "indexed";
20413 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
20414 && CONST_INT_P (plus_arg1))
20416 if (!quad_address_offset_p (INTVAL (plus_arg1)))
20418 extra_cost = 1;
20419 type = "vector d-form offset";
20423 /* Make sure the register class can handle offset addresses. */
20424 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20426 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20428 extra_cost = 1;
20429 type = "offset #2";
20433 else
20435 fail_msg = "bad PLUS";
20436 extra_cost = -1;
20439 break;
20441 case LO_SUM:
20442 /* Quad offsets are restricted and can't handle normal addresses. */
20443 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20445 extra_cost = -1;
20446 type = "vector d-form lo_sum";
20449 else if (!legitimate_lo_sum_address_p (mode, addr, false))
20451 fail_msg = "bad LO_SUM";
20452 extra_cost = -1;
20455 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20457 extra_cost = 1;
20458 type = "lo_sum";
20460 break;
20462 /* Static addresses need to create a TOC entry. */
20463 case CONST:
20464 case SYMBOL_REF:
20465 case LABEL_REF:
20466 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20468 extra_cost = -1;
20469 type = "vector d-form lo_sum #2";
20472 else
20474 type = "address";
20475 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
20477 break;
20479 /* TOC references look like offsetable memory. */
20480 case UNSPEC:
20481 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
20483 fail_msg = "bad UNSPEC";
20484 extra_cost = -1;
20487 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20489 extra_cost = -1;
20490 type = "vector d-form lo_sum #3";
20493 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20495 extra_cost = 1;
20496 type = "toc reference";
20498 break;
20500 default:
20502 fail_msg = "bad address";
20503 extra_cost = -1;
20507 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
20509 if (extra_cost < 0)
20510 fprintf (stderr,
20511 "rs6000_secondary_reload_memory error: mode = %s, "
20512 "class = %s, addr_mask = '%s', %s\n",
20513 GET_MODE_NAME (mode),
20514 reg_class_names[rclass],
20515 rs6000_debug_addr_mask (addr_mask, false),
20516 (fail_msg != NULL) ? fail_msg : "<bad address>");
20518 else
20519 fprintf (stderr,
20520 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20521 "addr_mask = '%s', extra cost = %d, %s\n",
20522 GET_MODE_NAME (mode),
20523 reg_class_names[rclass],
20524 rs6000_debug_addr_mask (addr_mask, false),
20525 extra_cost,
20526 (type) ? type : "<none>");
20528 debug_rtx (addr);
20531 return extra_cost;
20534 /* Helper function for rs6000_secondary_reload to return true if a move to a
20535 different register classe is really a simple move. */
20537 static bool
20538 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20539 enum rs6000_reg_type from_type,
20540 machine_mode mode)
20542 int size = GET_MODE_SIZE (mode);
20544 /* Add support for various direct moves available. In this function, we only
20545 look at cases where we don't need any extra registers, and one or more
20546 simple move insns are issued. Originally small integers are not allowed
20547 in FPR/VSX registers. Single precision binary floating is not a simple
20548 move because we need to convert to the single precision memory layout.
20549 The 4-byte SDmode can be moved. TDmode values are disallowed since they
20550 need special direct move handling, which we do not support yet. */
20551 if (TARGET_DIRECT_MOVE
20552 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20553 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
20555 if (TARGET_POWERPC64)
20557 /* ISA 2.07: MTVSRD or MVFVSRD. */
20558 if (size == 8)
20559 return true;
20561 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
20562 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
20563 return true;
20566 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20567 if (TARGET_VSX_SMALL_INTEGER && mode == SImode)
20568 return true;
20570 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
20571 if (mode == SDmode)
20572 return true;
20575 /* Power6+: MFTGPR or MFFGPR. */
20576 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
20577 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
20578 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20579 return true;
20581 /* Move to/from SPR. */
20582 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
20583 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
20584 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20585 return true;
20587 return false;
20590 /* Direct move helper function for rs6000_secondary_reload, handle all of the
20591 special direct moves that involve allocating an extra register, return the
20592 insn code of the helper function if there is such a function or
20593 CODE_FOR_nothing if not. */
20595 static bool
20596 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
20597 enum rs6000_reg_type from_type,
20598 machine_mode mode,
20599 secondary_reload_info *sri,
20600 bool altivec_p)
20602 bool ret = false;
20603 enum insn_code icode = CODE_FOR_nothing;
20604 int cost = 0;
20605 int size = GET_MODE_SIZE (mode);
20607 if (TARGET_POWERPC64 && size == 16)
20609 /* Handle moving 128-bit values from GPRs to VSX point registers on
20610 ISA 2.07 (power8, power9) when running in 64-bit mode using
20611 XXPERMDI to glue the two 64-bit values back together. */
20612 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20614 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
20615 icode = reg_addr[mode].reload_vsx_gpr;
20618 /* Handle moving 128-bit values from VSX point registers to GPRs on
20619 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
20620 bottom 64-bit value. */
20621 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20623 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
20624 icode = reg_addr[mode].reload_gpr_vsx;
20628 else if (TARGET_POWERPC64 && mode == SFmode)
20630 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20632 cost = 3; /* xscvdpspn, mfvsrd, and. */
20633 icode = reg_addr[mode].reload_gpr_vsx;
20636 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20638 cost = 2; /* mtvsrz, xscvspdpn. */
20639 icode = reg_addr[mode].reload_vsx_gpr;
20643 else if (!TARGET_POWERPC64 && size == 8)
20645 /* Handle moving 64-bit values from GPRs to floating point registers on
20646 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
20647 32-bit values back together. Altivec register classes must be handled
20648 specially since a different instruction is used, and the secondary
20649 reload support requires a single instruction class in the scratch
20650 register constraint. However, right now TFmode is not allowed in
20651 Altivec registers, so the pattern will never match. */
20652 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
20654 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
20655 icode = reg_addr[mode].reload_fpr_gpr;
20659 if (icode != CODE_FOR_nothing)
20661 ret = true;
20662 if (sri)
20664 sri->icode = icode;
20665 sri->extra_cost = cost;
20669 return ret;
20672 /* Return whether a move between two register classes can be done either
20673 directly (simple move) or via a pattern that uses a single extra temporary
20674 (using ISA 2.07's direct move in this case. */
20676 static bool
20677 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
20678 enum rs6000_reg_type from_type,
20679 machine_mode mode,
20680 secondary_reload_info *sri,
20681 bool altivec_p)
20683 /* Fall back to load/store reloads if either type is not a register. */
20684 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
20685 return false;
20687 /* If we haven't allocated registers yet, assume the move can be done for the
20688 standard register types. */
20689 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
20690 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
20691 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
20692 return true;
20694 /* Moves to the same set of registers is a simple move for non-specialized
20695 registers. */
20696 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
20697 return true;
20699 /* Check whether a simple move can be done directly. */
20700 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
20702 if (sri)
20704 sri->icode = CODE_FOR_nothing;
20705 sri->extra_cost = 0;
20707 return true;
20710 /* Now check if we can do it in a few steps. */
20711 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
20712 altivec_p);
20715 /* Inform reload about cases where moving X with a mode MODE to a register in
20716 RCLASS requires an extra scratch or immediate register. Return the class
20717 needed for the immediate register.
20719 For VSX and Altivec, we may need a register to convert sp+offset into
20720 reg+sp.
20722 For misaligned 64-bit gpr loads and stores we need a register to
20723 convert an offset address to indirect. */
20725 static reg_class_t
20726 rs6000_secondary_reload (bool in_p,
20727 rtx x,
20728 reg_class_t rclass_i,
20729 machine_mode mode,
20730 secondary_reload_info *sri)
20732 enum reg_class rclass = (enum reg_class) rclass_i;
20733 reg_class_t ret = ALL_REGS;
20734 enum insn_code icode;
20735 bool default_p = false;
20736 bool done_p = false;
20738 /* Allow subreg of memory before/during reload. */
20739 bool memory_p = (MEM_P (x)
20740 || (!reload_completed && GET_CODE (x) == SUBREG
20741 && MEM_P (SUBREG_REG (x))));
20743 sri->icode = CODE_FOR_nothing;
20744 sri->t_icode = CODE_FOR_nothing;
20745 sri->extra_cost = 0;
20746 icode = ((in_p)
20747 ? reg_addr[mode].reload_load
20748 : reg_addr[mode].reload_store);
20750 if (REG_P (x) || register_operand (x, mode))
20752 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20753 bool altivec_p = (rclass == ALTIVEC_REGS);
20754 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20756 if (!in_p)
20757 std::swap (to_type, from_type);
20759 /* Can we do a direct move of some sort? */
20760 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20761 altivec_p))
20763 icode = (enum insn_code)sri->icode;
20764 default_p = false;
20765 done_p = true;
20766 ret = NO_REGS;
20770 /* Make sure 0.0 is not reloaded or forced into memory. */
20771 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20773 ret = NO_REGS;
20774 default_p = false;
20775 done_p = true;
20778 /* If this is a scalar floating point value and we want to load it into the
20779 traditional Altivec registers, do it via a move via a traditional floating
20780 point register, unless we have D-form addressing. Also make sure that
20781 non-zero constants use a FPR. */
20782 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20783 && !mode_supports_vmx_dform (mode)
20784 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20785 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20787 ret = FLOAT_REGS;
20788 default_p = false;
20789 done_p = true;
20792 /* Handle reload of load/stores if we have reload helper functions. */
20793 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20795 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20796 mode);
20798 if (extra_cost >= 0)
20800 done_p = true;
20801 ret = NO_REGS;
20802 if (extra_cost > 0)
20804 sri->extra_cost = extra_cost;
20805 sri->icode = icode;
20810 /* Handle unaligned loads and stores of integer registers. */
20811 if (!done_p && TARGET_POWERPC64
20812 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20813 && memory_p
20814 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20816 rtx addr = XEXP (x, 0);
20817 rtx off = address_offset (addr);
20819 if (off != NULL_RTX)
20821 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20822 unsigned HOST_WIDE_INT offset = INTVAL (off);
20824 /* We need a secondary reload when our legitimate_address_p
20825 says the address is good (as otherwise the entire address
20826 will be reloaded), and the offset is not a multiple of
20827 four or we have an address wrap. Address wrap will only
20828 occur for LO_SUMs since legitimate_offset_address_p
20829 rejects addresses for 16-byte mems that will wrap. */
20830 if (GET_CODE (addr) == LO_SUM
20831 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20832 && ((offset & 3) != 0
20833 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20834 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20835 && (offset & 3) != 0))
20837 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20838 if (in_p)
20839 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20840 : CODE_FOR_reload_di_load);
20841 else
20842 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20843 : CODE_FOR_reload_di_store);
20844 sri->extra_cost = 2;
20845 ret = NO_REGS;
20846 done_p = true;
20848 else
20849 default_p = true;
20851 else
20852 default_p = true;
20855 if (!done_p && !TARGET_POWERPC64
20856 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20857 && memory_p
20858 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20860 rtx addr = XEXP (x, 0);
20861 rtx off = address_offset (addr);
20863 if (off != NULL_RTX)
20865 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20866 unsigned HOST_WIDE_INT offset = INTVAL (off);
20868 /* We need a secondary reload when our legitimate_address_p
20869 says the address is good (as otherwise the entire address
20870 will be reloaded), and we have a wrap.
20872 legitimate_lo_sum_address_p allows LO_SUM addresses to
20873 have any offset so test for wrap in the low 16 bits.
20875 legitimate_offset_address_p checks for the range
20876 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20877 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20878 [0x7ff4,0x7fff] respectively, so test for the
20879 intersection of these ranges, [0x7ffc,0x7fff] and
20880 [0x7ff4,0x7ff7] respectively.
20882 Note that the address we see here may have been
20883 manipulated by legitimize_reload_address. */
20884 if (GET_CODE (addr) == LO_SUM
20885 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20886 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20888 if (in_p)
20889 sri->icode = CODE_FOR_reload_si_load;
20890 else
20891 sri->icode = CODE_FOR_reload_si_store;
20892 sri->extra_cost = 2;
20893 ret = NO_REGS;
20894 done_p = true;
20896 else
20897 default_p = true;
20899 else
20900 default_p = true;
20903 if (!done_p)
20904 default_p = true;
20906 if (default_p)
20907 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20909 gcc_assert (ret != ALL_REGS);
20911 if (TARGET_DEBUG_ADDR)
20913 fprintf (stderr,
20914 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20915 "mode = %s",
20916 reg_class_names[ret],
20917 in_p ? "true" : "false",
20918 reg_class_names[rclass],
20919 GET_MODE_NAME (mode));
20921 if (reload_completed)
20922 fputs (", after reload", stderr);
20924 if (!done_p)
20925 fputs (", done_p not set", stderr);
20927 if (default_p)
20928 fputs (", default secondary reload", stderr);
20930 if (sri->icode != CODE_FOR_nothing)
20931 fprintf (stderr, ", reload func = %s, extra cost = %d",
20932 insn_data[sri->icode].name, sri->extra_cost);
20934 else if (sri->extra_cost > 0)
20935 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20937 fputs ("\n", stderr);
20938 debug_rtx (x);
20941 return ret;
20944 /* Better tracing for rs6000_secondary_reload_inner. */
20946 static void
20947 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20948 bool store_p)
20950 rtx set, clobber;
20952 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20954 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20955 store_p ? "store" : "load");
20957 if (store_p)
20958 set = gen_rtx_SET (mem, reg);
20959 else
20960 set = gen_rtx_SET (reg, mem);
20962 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20963 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20966 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20967 ATTRIBUTE_NORETURN;
20969 static void
20970 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20971 bool store_p)
20973 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20974 gcc_unreachable ();
20977 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20978 reload helper functions. These were identified in
20979 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20980 reload, it calls the insns:
20981 reload_<RELOAD:mode>_<P:mptrsize>_store
20982 reload_<RELOAD:mode>_<P:mptrsize>_load
20984 which in turn calls this function, to do whatever is necessary to create
20985 valid addresses. */
20987 void
20988 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20990 int regno = true_regnum (reg);
20991 machine_mode mode = GET_MODE (reg);
20992 addr_mask_type addr_mask;
20993 rtx addr;
20994 rtx new_addr;
20995 rtx op_reg, op0, op1;
20996 rtx and_op;
20997 rtx cc_clobber;
20998 rtvec rv;
21000 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
21001 || !base_reg_operand (scratch, GET_MODE (scratch)))
21002 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21004 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
21005 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21007 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
21008 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21010 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
21011 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21013 else
21014 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21016 /* Make sure the mode is valid in this register class. */
21017 if ((addr_mask & RELOAD_REG_VALID) == 0)
21018 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21020 if (TARGET_DEBUG_ADDR)
21021 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
21023 new_addr = addr = XEXP (mem, 0);
21024 switch (GET_CODE (addr))
21026 /* Does the register class support auto update forms for this mode? If
21027 not, do the update now. We don't need a scratch register, since the
21028 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
21029 case PRE_INC:
21030 case PRE_DEC:
21031 op_reg = XEXP (addr, 0);
21032 if (!base_reg_operand (op_reg, Pmode))
21033 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21035 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21037 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
21038 new_addr = op_reg;
21040 break;
21042 case PRE_MODIFY:
21043 op0 = XEXP (addr, 0);
21044 op1 = XEXP (addr, 1);
21045 if (!base_reg_operand (op0, Pmode)
21046 || GET_CODE (op1) != PLUS
21047 || !rtx_equal_p (op0, XEXP (op1, 0)))
21048 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21050 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21052 emit_insn (gen_rtx_SET (op0, op1));
21053 new_addr = reg;
21055 break;
21057 /* Do we need to simulate AND -16 to clear the bottom address bits used
21058 in VMX load/stores? */
21059 case AND:
21060 op0 = XEXP (addr, 0);
21061 op1 = XEXP (addr, 1);
21062 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
21064 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
21065 op_reg = op0;
21067 else if (GET_CODE (op1) == PLUS)
21069 emit_insn (gen_rtx_SET (scratch, op1));
21070 op_reg = scratch;
21073 else
21074 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21076 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
21077 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
21078 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
21079 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
21080 new_addr = scratch;
21082 break;
21084 /* If this is an indirect address, make sure it is a base register. */
21085 case REG:
21086 case SUBREG:
21087 if (!base_reg_operand (addr, GET_MODE (addr)))
21089 emit_insn (gen_rtx_SET (scratch, addr));
21090 new_addr = scratch;
21092 break;
21094 /* If this is an indexed address, make sure the register class can handle
21095 indexed addresses for this mode. */
21096 case PLUS:
21097 op0 = XEXP (addr, 0);
21098 op1 = XEXP (addr, 1);
21099 if (!base_reg_operand (op0, Pmode))
21100 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21102 else if (int_reg_operand (op1, Pmode))
21104 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21106 emit_insn (gen_rtx_SET (scratch, addr));
21107 new_addr = scratch;
21111 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
21113 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
21114 || !quad_address_p (addr, mode, false))
21116 emit_insn (gen_rtx_SET (scratch, addr));
21117 new_addr = scratch;
21121 /* Make sure the register class can handle offset addresses. */
21122 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
21124 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21126 emit_insn (gen_rtx_SET (scratch, addr));
21127 new_addr = scratch;
21131 else
21132 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21134 break;
21136 case LO_SUM:
21137 op0 = XEXP (addr, 0);
21138 op1 = XEXP (addr, 1);
21139 if (!base_reg_operand (op0, Pmode))
21140 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21142 else if (int_reg_operand (op1, Pmode))
21144 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21146 emit_insn (gen_rtx_SET (scratch, addr));
21147 new_addr = scratch;
21151 /* Quad offsets are restricted and can't handle normal addresses. */
21152 else if (mode_supports_vsx_dform_quad (mode))
21154 emit_insn (gen_rtx_SET (scratch, addr));
21155 new_addr = scratch;
21158 /* Make sure the register class can handle offset addresses. */
21159 else if (legitimate_lo_sum_address_p (mode, addr, false))
21161 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21163 emit_insn (gen_rtx_SET (scratch, addr));
21164 new_addr = scratch;
21168 else
21169 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21171 break;
21173 case SYMBOL_REF:
21174 case CONST:
21175 case LABEL_REF:
21176 rs6000_emit_move (scratch, addr, Pmode);
21177 new_addr = scratch;
21178 break;
21180 default:
21181 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
21184 /* Adjust the address if it changed. */
21185 if (addr != new_addr)
21187 mem = replace_equiv_address_nv (mem, new_addr);
21188 if (TARGET_DEBUG_ADDR)
21189 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
21192 /* Now create the move. */
21193 if (store_p)
21194 emit_insn (gen_rtx_SET (mem, reg));
21195 else
21196 emit_insn (gen_rtx_SET (reg, mem));
21198 return;
21201 /* Convert reloads involving 64-bit gprs and misaligned offset
21202 addressing, or multiple 32-bit gprs and offsets that are too large,
21203 to use indirect addressing. */
21205 void
21206 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
21208 int regno = true_regnum (reg);
21209 enum reg_class rclass;
21210 rtx addr;
21211 rtx scratch_or_premodify = scratch;
21213 if (TARGET_DEBUG_ADDR)
21215 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
21216 store_p ? "store" : "load");
21217 fprintf (stderr, "reg:\n");
21218 debug_rtx (reg);
21219 fprintf (stderr, "mem:\n");
21220 debug_rtx (mem);
21221 fprintf (stderr, "scratch:\n");
21222 debug_rtx (scratch);
21225 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
21226 gcc_assert (GET_CODE (mem) == MEM);
21227 rclass = REGNO_REG_CLASS (regno);
21228 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
21229 addr = XEXP (mem, 0);
21231 if (GET_CODE (addr) == PRE_MODIFY)
21233 gcc_assert (REG_P (XEXP (addr, 0))
21234 && GET_CODE (XEXP (addr, 1)) == PLUS
21235 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
21236 scratch_or_premodify = XEXP (addr, 0);
21237 if (!HARD_REGISTER_P (scratch_or_premodify))
21238 /* If we have a pseudo here then reload will have arranged
21239 to have it replaced, but only in the original insn.
21240 Use the replacement here too. */
21241 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
21243 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
21244 expressions from the original insn, without unsharing them.
21245 Any RTL that points into the original insn will of course
21246 have register replacements applied. That is why we don't
21247 need to look for replacements under the PLUS. */
21248 addr = XEXP (addr, 1);
21250 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
21252 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
21254 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
21256 /* Now create the move. */
21257 if (store_p)
21258 emit_insn (gen_rtx_SET (mem, reg));
21259 else
21260 emit_insn (gen_rtx_SET (reg, mem));
21262 return;
21265 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
21266 this function has any SDmode references. If we are on a power7 or later, we
21267 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
21268 can load/store the value. */
21270 static void
21271 rs6000_alloc_sdmode_stack_slot (void)
21273 tree t;
21274 basic_block bb;
21275 gimple_stmt_iterator gsi;
21277 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
21278 /* We use a different approach for dealing with the secondary
21279 memory in LRA. */
21280 if (ira_use_lra_p)
21281 return;
21283 if (TARGET_NO_SDMODE_STACK)
21284 return;
21286 FOR_EACH_BB_FN (bb, cfun)
21287 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21289 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
21290 if (ret)
21292 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21293 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21294 SDmode, 0);
21295 return;
21299 /* Check for any SDmode parameters of the function. */
21300 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
21302 if (TREE_TYPE (t) == error_mark_node)
21303 continue;
21305 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
21306 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
21308 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
21309 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
21310 SDmode, 0);
21311 return;
21316 static void
21317 rs6000_instantiate_decls (void)
21319 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
21320 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
21323 /* Given an rtx X being reloaded into a reg required to be
21324 in class CLASS, return the class of reg to actually use.
21325 In general this is just CLASS; but on some machines
21326 in some cases it is preferable to use a more restrictive class.
21328 On the RS/6000, we have to return NO_REGS when we want to reload a
21329 floating-point CONST_DOUBLE to force it to be copied to memory.
21331 We also don't want to reload integer values into floating-point
21332 registers if we can at all help it. In fact, this can
21333 cause reload to die, if it tries to generate a reload of CTR
21334 into a FP register and discovers it doesn't have the memory location
21335 required.
21337 ??? Would it be a good idea to have reload do the converse, that is
21338 try to reload floating modes into FP registers if possible?
21341 static enum reg_class
21342 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
21344 machine_mode mode = GET_MODE (x);
21345 bool is_constant = CONSTANT_P (x);
21347 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
21348 reload class for it. */
21349 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21350 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
21351 return NO_REGS;
21353 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
21354 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
21355 return NO_REGS;
21357 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
21358 the reloading of address expressions using PLUS into floating point
21359 registers. */
21360 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
21362 if (is_constant)
21364 /* Zero is always allowed in all VSX registers. */
21365 if (x == CONST0_RTX (mode))
21366 return rclass;
21368 /* If this is a vector constant that can be formed with a few Altivec
21369 instructions, we want altivec registers. */
21370 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
21371 return ALTIVEC_REGS;
21373 /* Force constant to memory. */
21374 return NO_REGS;
21377 /* D-form addressing can easily reload the value. */
21378 if (mode_supports_vmx_dform (mode)
21379 || mode_supports_vsx_dform_quad (mode))
21380 return rclass;
21382 /* If this is a scalar floating point value and we don't have D-form
21383 addressing, prefer the traditional floating point registers so that we
21384 can use D-form (register+offset) addressing. */
21385 if (rclass == VSX_REGS
21386 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
21387 return FLOAT_REGS;
21389 /* Prefer the Altivec registers if Altivec is handling the vector
21390 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
21391 loads. */
21392 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
21393 || mode == V1TImode)
21394 return ALTIVEC_REGS;
21396 return rclass;
21399 if (is_constant || GET_CODE (x) == PLUS)
21401 if (reg_class_subset_p (GENERAL_REGS, rclass))
21402 return GENERAL_REGS;
21403 if (reg_class_subset_p (BASE_REGS, rclass))
21404 return BASE_REGS;
21405 return NO_REGS;
21408 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21409 return GENERAL_REGS;
21411 return rclass;
21414 /* Debug version of rs6000_preferred_reload_class. */
21415 static enum reg_class
21416 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
21418 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
21420 fprintf (stderr,
21421 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
21422 "mode = %s, x:\n",
21423 reg_class_names[ret], reg_class_names[rclass],
21424 GET_MODE_NAME (GET_MODE (x)));
21425 debug_rtx (x);
21427 return ret;
21430 /* If we are copying between FP or AltiVec registers and anything else, we need
21431 a memory location. The exception is when we are targeting ppc64 and the
21432 move to/from fpr to gpr instructions are available. Also, under VSX, you
21433 can copy vector registers from the FP register set to the Altivec register
21434 set and vice versa. */
21436 static bool
21437 rs6000_secondary_memory_needed (enum reg_class from_class,
21438 enum reg_class to_class,
21439 machine_mode mode)
21441 enum rs6000_reg_type from_type, to_type;
21442 bool altivec_p = ((from_class == ALTIVEC_REGS)
21443 || (to_class == ALTIVEC_REGS));
21445 /* If a simple/direct move is available, we don't need secondary memory */
21446 from_type = reg_class_to_reg_type[(int)from_class];
21447 to_type = reg_class_to_reg_type[(int)to_class];
21449 if (rs6000_secondary_reload_move (to_type, from_type, mode,
21450 (secondary_reload_info *)0, altivec_p))
21451 return false;
21453 /* If we have a floating point or vector register class, we need to use
21454 memory to transfer the data. */
21455 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21456 return true;
21458 return false;
21461 /* Debug version of rs6000_secondary_memory_needed. */
21462 static bool
21463 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21464 enum reg_class to_class,
21465 machine_mode mode)
21467 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21469 fprintf (stderr,
21470 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21471 "to_class = %s, mode = %s\n",
21472 ret ? "true" : "false",
21473 reg_class_names[from_class],
21474 reg_class_names[to_class],
21475 GET_MODE_NAME (mode));
21477 return ret;
21480 /* Return the register class of a scratch register needed to copy IN into
21481 or out of a register in RCLASS in MODE. If it can be done directly,
21482 NO_REGS is returned. */
21484 static enum reg_class
21485 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
21486 rtx in)
21488 int regno;
21490 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
21491 #if TARGET_MACHO
21492 && MACHOPIC_INDIRECT
21493 #endif
21496 /* We cannot copy a symbolic operand directly into anything
21497 other than BASE_REGS for TARGET_ELF. So indicate that a
21498 register from BASE_REGS is needed as an intermediate
21499 register.
21501 On Darwin, pic addresses require a load from memory, which
21502 needs a base register. */
21503 if (rclass != BASE_REGS
21504 && (GET_CODE (in) == SYMBOL_REF
21505 || GET_CODE (in) == HIGH
21506 || GET_CODE (in) == LABEL_REF
21507 || GET_CODE (in) == CONST))
21508 return BASE_REGS;
21511 if (GET_CODE (in) == REG)
21513 regno = REGNO (in);
21514 if (regno >= FIRST_PSEUDO_REGISTER)
21516 regno = true_regnum (in);
21517 if (regno >= FIRST_PSEUDO_REGISTER)
21518 regno = -1;
21521 else if (GET_CODE (in) == SUBREG)
21523 regno = true_regnum (in);
21524 if (regno >= FIRST_PSEUDO_REGISTER)
21525 regno = -1;
21527 else
21528 regno = -1;
21530 /* If we have VSX register moves, prefer moving scalar values between
21531 Altivec registers and GPR by going via an FPR (and then via memory)
21532 instead of reloading the secondary memory address for Altivec moves. */
21533 if (TARGET_VSX
21534 && GET_MODE_SIZE (mode) < 16
21535 && !mode_supports_vmx_dform (mode)
21536 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
21537 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
21538 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21539 && (regno >= 0 && INT_REGNO_P (regno)))))
21540 return FLOAT_REGS;
21542 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
21543 into anything. */
21544 if (rclass == GENERAL_REGS || rclass == BASE_REGS
21545 || (regno >= 0 && INT_REGNO_P (regno)))
21546 return NO_REGS;
21548 /* Constants, memory, and VSX registers can go into VSX registers (both the
21549 traditional floating point and the altivec registers). */
21550 if (rclass == VSX_REGS
21551 && (regno == -1 || VSX_REGNO_P (regno)))
21552 return NO_REGS;
21554 /* Constants, memory, and FP registers can go into FP registers. */
21555 if ((regno == -1 || FP_REGNO_P (regno))
21556 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21557 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21559 /* Memory, and AltiVec registers can go into AltiVec registers. */
21560 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21561 && rclass == ALTIVEC_REGS)
21562 return NO_REGS;
21564 /* We can copy among the CR registers. */
21565 if ((rclass == CR_REGS || rclass == CR0_REGS)
21566 && regno >= 0 && CR_REGNO_P (regno))
21567 return NO_REGS;
21569 /* Otherwise, we need GENERAL_REGS. */
21570 return GENERAL_REGS;
21573 /* Debug version of rs6000_secondary_reload_class. */
21574 static enum reg_class
21575 rs6000_debug_secondary_reload_class (enum reg_class rclass,
21576 machine_mode mode, rtx in)
21578 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
21579 fprintf (stderr,
21580 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
21581 "mode = %s, input rtx:\n",
21582 reg_class_names[ret], reg_class_names[rclass],
21583 GET_MODE_NAME (mode));
21584 debug_rtx (in);
21586 return ret;
21589 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
21591 static bool
21592 rs6000_cannot_change_mode_class (machine_mode from,
21593 machine_mode to,
21594 enum reg_class rclass)
21596 unsigned from_size = GET_MODE_SIZE (from);
21597 unsigned to_size = GET_MODE_SIZE (to);
21599 if (from_size != to_size)
21601 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21603 if (reg_classes_intersect_p (xclass, rclass))
21605 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21606 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21607 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
21608 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
21610 /* Don't allow 64-bit types to overlap with 128-bit types that take a
21611 single register under VSX because the scalar part of the register
21612 is in the upper 64-bits, and not the lower 64-bits. Types like
21613 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21614 IEEE floating point can't overlap, and neither can small
21615 values. */
21617 if (to_float128_vector_p && from_float128_vector_p)
21618 return false;
21620 else if (to_float128_vector_p || from_float128_vector_p)
21621 return true;
21623 /* TDmode in floating-mode registers must always go into a register
21624 pair with the most significant word in the even-numbered register
21625 to match ISA requirements. In little-endian mode, this does not
21626 match subreg numbering, so we cannot allow subregs. */
21627 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21628 return true;
21630 if (from_size < 8 || to_size < 8)
21631 return true;
21633 if (from_size == 8 && (8 * to_nregs) != to_size)
21634 return true;
21636 if (to_size == 8 && (8 * from_nregs) != from_size)
21637 return true;
21639 return false;
21641 else
21642 return false;
21645 if (TARGET_E500_DOUBLE
21646 && ((((to) == DFmode) + ((from) == DFmode)) == 1
21647 || (((to) == TFmode) + ((from) == TFmode)) == 1
21648 || (((to) == IFmode) + ((from) == IFmode)) == 1
21649 || (((to) == KFmode) + ((from) == KFmode)) == 1
21650 || (((to) == DDmode) + ((from) == DDmode)) == 1
21651 || (((to) == TDmode) + ((from) == TDmode)) == 1
21652 || (((to) == DImode) + ((from) == DImode)) == 1))
21653 return true;
21655 /* Since the VSX register set includes traditional floating point registers
21656 and altivec registers, just check for the size being different instead of
21657 trying to check whether the modes are vector modes. Otherwise it won't
21658 allow say DF and DI to change classes. For types like TFmode and TDmode
21659 that take 2 64-bit registers, rather than a single 128-bit register, don't
21660 allow subregs of those types to other 128 bit types. */
21661 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21663 unsigned num_regs = (from_size + 15) / 16;
21664 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21665 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21666 return true;
21668 return (from_size != 8 && from_size != 16);
21671 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21672 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21673 return true;
21675 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
21676 && reg_classes_intersect_p (GENERAL_REGS, rclass))
21677 return true;
21679 return false;
21682 /* Debug version of rs6000_cannot_change_mode_class. */
21683 static bool
21684 rs6000_debug_cannot_change_mode_class (machine_mode from,
21685 machine_mode to,
21686 enum reg_class rclass)
21688 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
21690 fprintf (stderr,
21691 "rs6000_cannot_change_mode_class, return %s, from = %s, "
21692 "to = %s, rclass = %s\n",
21693 ret ? "true" : "false",
21694 GET_MODE_NAME (from), GET_MODE_NAME (to),
21695 reg_class_names[rclass]);
21697 return ret;
21700 /* Return a string to do a move operation of 128 bits of data. */
21702 const char *
21703 rs6000_output_move_128bit (rtx operands[])
21705 rtx dest = operands[0];
21706 rtx src = operands[1];
21707 machine_mode mode = GET_MODE (dest);
21708 int dest_regno;
21709 int src_regno;
21710 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21711 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21713 if (REG_P (dest))
21715 dest_regno = REGNO (dest);
21716 dest_gpr_p = INT_REGNO_P (dest_regno);
21717 dest_fp_p = FP_REGNO_P (dest_regno);
21718 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21719 dest_vsx_p = dest_fp_p | dest_vmx_p;
21721 else
21723 dest_regno = -1;
21724 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21727 if (REG_P (src))
21729 src_regno = REGNO (src);
21730 src_gpr_p = INT_REGNO_P (src_regno);
21731 src_fp_p = FP_REGNO_P (src_regno);
21732 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21733 src_vsx_p = src_fp_p | src_vmx_p;
21735 else
21737 src_regno = -1;
21738 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21741 /* Register moves. */
21742 if (dest_regno >= 0 && src_regno >= 0)
21744 if (dest_gpr_p)
21746 if (src_gpr_p)
21747 return "#";
21749 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21750 return (WORDS_BIG_ENDIAN
21751 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21752 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21754 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21755 return "#";
21758 else if (TARGET_VSX && dest_vsx_p)
21760 if (src_vsx_p)
21761 return "xxlor %x0,%x1,%x1";
21763 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21764 return (WORDS_BIG_ENDIAN
21765 ? "mtvsrdd %x0,%1,%L1"
21766 : "mtvsrdd %x0,%L1,%1");
21768 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21769 return "#";
21772 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21773 return "vor %0,%1,%1";
21775 else if (dest_fp_p && src_fp_p)
21776 return "#";
21779 /* Loads. */
21780 else if (dest_regno >= 0 && MEM_P (src))
21782 if (dest_gpr_p)
21784 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21785 return "lq %0,%1";
21786 else
21787 return "#";
21790 else if (TARGET_ALTIVEC && dest_vmx_p
21791 && altivec_indexed_or_indirect_operand (src, mode))
21792 return "lvx %0,%y1";
21794 else if (TARGET_VSX && dest_vsx_p)
21796 if (mode_supports_vsx_dform_quad (mode)
21797 && quad_address_p (XEXP (src, 0), mode, true))
21798 return "lxv %x0,%1";
21800 else if (TARGET_P9_VECTOR)
21801 return "lxvx %x0,%y1";
21803 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21804 return "lxvw4x %x0,%y1";
21806 else
21807 return "lxvd2x %x0,%y1";
21810 else if (TARGET_ALTIVEC && dest_vmx_p)
21811 return "lvx %0,%y1";
21813 else if (dest_fp_p)
21814 return "#";
21817 /* Stores. */
21818 else if (src_regno >= 0 && MEM_P (dest))
21820 if (src_gpr_p)
21822 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21823 return "stq %1,%0";
21824 else
21825 return "#";
21828 else if (TARGET_ALTIVEC && src_vmx_p
21829 && altivec_indexed_or_indirect_operand (src, mode))
21830 return "stvx %1,%y0";
21832 else if (TARGET_VSX && src_vsx_p)
21834 if (mode_supports_vsx_dform_quad (mode)
21835 && quad_address_p (XEXP (dest, 0), mode, true))
21836 return "stxv %x1,%0";
21838 else if (TARGET_P9_VECTOR)
21839 return "stxvx %x1,%y0";
21841 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21842 return "stxvw4x %x1,%y0";
21844 else
21845 return "stxvd2x %x1,%y0";
21848 else if (TARGET_ALTIVEC && src_vmx_p)
21849 return "stvx %1,%y0";
21851 else if (src_fp_p)
21852 return "#";
21855 /* Constants. */
21856 else if (dest_regno >= 0
21857 && (GET_CODE (src) == CONST_INT
21858 || GET_CODE (src) == CONST_WIDE_INT
21859 || GET_CODE (src) == CONST_DOUBLE
21860 || GET_CODE (src) == CONST_VECTOR))
21862 if (dest_gpr_p)
21863 return "#";
21865 else if ((dest_vmx_p && TARGET_ALTIVEC)
21866 || (dest_vsx_p && TARGET_VSX))
21867 return output_vec_const_move (operands);
21870 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21873 /* Validate a 128-bit move. */
21874 bool
21875 rs6000_move_128bit_ok_p (rtx operands[])
21877 machine_mode mode = GET_MODE (operands[0]);
21878 return (gpc_reg_operand (operands[0], mode)
21879 || gpc_reg_operand (operands[1], mode));
21882 /* Return true if a 128-bit move needs to be split. */
21883 bool
21884 rs6000_split_128bit_ok_p (rtx operands[])
21886 if (!reload_completed)
21887 return false;
21889 if (!gpr_or_gpr_p (operands[0], operands[1]))
21890 return false;
21892 if (quad_load_store_p (operands[0], operands[1]))
21893 return false;
21895 return true;
21899 /* Given a comparison operation, return the bit number in CCR to test. We
21900 know this is a valid comparison.
21902 SCC_P is 1 if this is for an scc. That means that %D will have been
21903 used instead of %C, so the bits will be in different places.
21905 Return -1 if OP isn't a valid comparison for some reason. */
21908 ccr_bit (rtx op, int scc_p)
21910 enum rtx_code code = GET_CODE (op);
21911 machine_mode cc_mode;
21912 int cc_regnum;
21913 int base_bit;
21914 rtx reg;
21916 if (!COMPARISON_P (op))
21917 return -1;
21919 reg = XEXP (op, 0);
21921 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21923 cc_mode = GET_MODE (reg);
21924 cc_regnum = REGNO (reg);
21925 base_bit = 4 * (cc_regnum - CR0_REGNO);
21927 validate_condition_mode (code, cc_mode);
21929 /* When generating a sCOND operation, only positive conditions are
21930 allowed. */
21931 gcc_assert (!scc_p
21932 || code == EQ || code == GT || code == LT || code == UNORDERED
21933 || code == GTU || code == LTU);
21935 switch (code)
21937 case NE:
21938 return scc_p ? base_bit + 3 : base_bit + 2;
21939 case EQ:
21940 return base_bit + 2;
21941 case GT: case GTU: case UNLE:
21942 return base_bit + 1;
21943 case LT: case LTU: case UNGE:
21944 return base_bit;
21945 case ORDERED: case UNORDERED:
21946 return base_bit + 3;
21948 case GE: case GEU:
21949 /* If scc, we will have done a cror to put the bit in the
21950 unordered position. So test that bit. For integer, this is ! LT
21951 unless this is an scc insn. */
21952 return scc_p ? base_bit + 3 : base_bit;
21954 case LE: case LEU:
21955 return scc_p ? base_bit + 3 : base_bit + 1;
21957 default:
21958 gcc_unreachable ();
21962 /* Return the GOT register. */
21965 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21967 /* The second flow pass currently (June 1999) can't update
21968 regs_ever_live without disturbing other parts of the compiler, so
21969 update it here to make the prolog/epilogue code happy. */
21970 if (!can_create_pseudo_p ()
21971 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21972 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21974 crtl->uses_pic_offset_table = 1;
21976 return pic_offset_table_rtx;
21979 static rs6000_stack_t stack_info;
21981 /* Function to init struct machine_function.
21982 This will be called, via a pointer variable,
21983 from push_function_context. */
21985 static struct machine_function *
21986 rs6000_init_machine_status (void)
21988 stack_info.reload_completed = 0;
21989 return ggc_cleared_alloc<machine_function> ();
21992 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21994 /* Write out a function code label. */
21996 void
21997 rs6000_output_function_entry (FILE *file, const char *fname)
21999 if (fname[0] != '.')
22001 switch (DEFAULT_ABI)
22003 default:
22004 gcc_unreachable ();
22006 case ABI_AIX:
22007 if (DOT_SYMBOLS)
22008 putc ('.', file);
22009 else
22010 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
22011 break;
22013 case ABI_ELFv2:
22014 case ABI_V4:
22015 case ABI_DARWIN:
22016 break;
22020 RS6000_OUTPUT_BASENAME (file, fname);
22023 /* Print an operand. Recognize special options, documented below. */
22025 #if TARGET_ELF
22026 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
22027 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
22028 #else
22029 #define SMALL_DATA_RELOC "sda21"
22030 #define SMALL_DATA_REG 0
22031 #endif
22033 void
22034 print_operand (FILE *file, rtx x, int code)
22036 int i;
22037 unsigned HOST_WIDE_INT uval;
22039 switch (code)
22041 /* %a is output_address. */
22043 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
22044 output_operand. */
22046 case 'D':
22047 /* Like 'J' but get to the GT bit only. */
22048 gcc_assert (REG_P (x));
22050 /* Bit 1 is GT bit. */
22051 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
22053 /* Add one for shift count in rlinm for scc. */
22054 fprintf (file, "%d", i + 1);
22055 return;
22057 case 'e':
22058 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
22059 if (! INT_P (x))
22061 output_operand_lossage ("invalid %%e value");
22062 return;
22065 uval = INTVAL (x);
22066 if ((uval & 0xffff) == 0 && uval != 0)
22067 putc ('s', file);
22068 return;
22070 case 'E':
22071 /* X is a CR register. Print the number of the EQ bit of the CR */
22072 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22073 output_operand_lossage ("invalid %%E value");
22074 else
22075 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
22076 return;
22078 case 'f':
22079 /* X is a CR register. Print the shift count needed to move it
22080 to the high-order four bits. */
22081 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22082 output_operand_lossage ("invalid %%f value");
22083 else
22084 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
22085 return;
22087 case 'F':
22088 /* Similar, but print the count for the rotate in the opposite
22089 direction. */
22090 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22091 output_operand_lossage ("invalid %%F value");
22092 else
22093 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
22094 return;
22096 case 'G':
22097 /* X is a constant integer. If it is negative, print "m",
22098 otherwise print "z". This is to make an aze or ame insn. */
22099 if (GET_CODE (x) != CONST_INT)
22100 output_operand_lossage ("invalid %%G value");
22101 else if (INTVAL (x) >= 0)
22102 putc ('z', file);
22103 else
22104 putc ('m', file);
22105 return;
22107 case 'h':
22108 /* If constant, output low-order five bits. Otherwise, write
22109 normally. */
22110 if (INT_P (x))
22111 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
22112 else
22113 print_operand (file, x, 0);
22114 return;
22116 case 'H':
22117 /* If constant, output low-order six bits. Otherwise, write
22118 normally. */
22119 if (INT_P (x))
22120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
22121 else
22122 print_operand (file, x, 0);
22123 return;
22125 case 'I':
22126 /* Print `i' if this is a constant, else nothing. */
22127 if (INT_P (x))
22128 putc ('i', file);
22129 return;
22131 case 'j':
22132 /* Write the bit number in CCR for jump. */
22133 i = ccr_bit (x, 0);
22134 if (i == -1)
22135 output_operand_lossage ("invalid %%j code");
22136 else
22137 fprintf (file, "%d", i);
22138 return;
22140 case 'J':
22141 /* Similar, but add one for shift count in rlinm for scc and pass
22142 scc flag to `ccr_bit'. */
22143 i = ccr_bit (x, 1);
22144 if (i == -1)
22145 output_operand_lossage ("invalid %%J code");
22146 else
22147 /* If we want bit 31, write a shift count of zero, not 32. */
22148 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22149 return;
22151 case 'k':
22152 /* X must be a constant. Write the 1's complement of the
22153 constant. */
22154 if (! INT_P (x))
22155 output_operand_lossage ("invalid %%k value");
22156 else
22157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
22158 return;
22160 case 'K':
22161 /* X must be a symbolic constant on ELF. Write an
22162 expression suitable for an 'addi' that adds in the low 16
22163 bits of the MEM. */
22164 if (GET_CODE (x) == CONST)
22166 if (GET_CODE (XEXP (x, 0)) != PLUS
22167 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
22168 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
22169 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
22170 output_operand_lossage ("invalid %%K value");
22172 print_operand_address (file, x);
22173 fputs ("@l", file);
22174 return;
22176 /* %l is output_asm_label. */
22178 case 'L':
22179 /* Write second word of DImode or DFmode reference. Works on register
22180 or non-indexed memory only. */
22181 if (REG_P (x))
22182 fputs (reg_names[REGNO (x) + 1], file);
22183 else if (MEM_P (x))
22185 machine_mode mode = GET_MODE (x);
22186 /* Handle possible auto-increment. Since it is pre-increment and
22187 we have already done it, we can just use an offset of word. */
22188 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22189 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22190 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22191 UNITS_PER_WORD));
22192 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22193 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
22194 UNITS_PER_WORD));
22195 else
22196 output_address (mode, XEXP (adjust_address_nv (x, SImode,
22197 UNITS_PER_WORD),
22198 0));
22200 if (small_data_operand (x, GET_MODE (x)))
22201 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22202 reg_names[SMALL_DATA_REG]);
22204 return;
22206 case 'N':
22207 /* Write the number of elements in the vector times 4. */
22208 if (GET_CODE (x) != PARALLEL)
22209 output_operand_lossage ("invalid %%N value");
22210 else
22211 fprintf (file, "%d", XVECLEN (x, 0) * 4);
22212 return;
22214 case 'O':
22215 /* Similar, but subtract 1 first. */
22216 if (GET_CODE (x) != PARALLEL)
22217 output_operand_lossage ("invalid %%O value");
22218 else
22219 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
22220 return;
22222 case 'p':
22223 /* X is a CONST_INT that is a power of two. Output the logarithm. */
22224 if (! INT_P (x)
22225 || INTVAL (x) < 0
22226 || (i = exact_log2 (INTVAL (x))) < 0)
22227 output_operand_lossage ("invalid %%p value");
22228 else
22229 fprintf (file, "%d", i);
22230 return;
22232 case 'P':
22233 /* The operand must be an indirect memory reference. The result
22234 is the register name. */
22235 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
22236 || REGNO (XEXP (x, 0)) >= 32)
22237 output_operand_lossage ("invalid %%P value");
22238 else
22239 fputs (reg_names[REGNO (XEXP (x, 0))], file);
22240 return;
22242 case 'q':
22243 /* This outputs the logical code corresponding to a boolean
22244 expression. The expression may have one or both operands
22245 negated (if one, only the first one). For condition register
22246 logical operations, it will also treat the negated
22247 CR codes as NOTs, but not handle NOTs of them. */
22249 const char *const *t = 0;
22250 const char *s;
22251 enum rtx_code code = GET_CODE (x);
22252 static const char * const tbl[3][3] = {
22253 { "and", "andc", "nor" },
22254 { "or", "orc", "nand" },
22255 { "xor", "eqv", "xor" } };
22257 if (code == AND)
22258 t = tbl[0];
22259 else if (code == IOR)
22260 t = tbl[1];
22261 else if (code == XOR)
22262 t = tbl[2];
22263 else
22264 output_operand_lossage ("invalid %%q value");
22266 if (GET_CODE (XEXP (x, 0)) != NOT)
22267 s = t[0];
22268 else
22270 if (GET_CODE (XEXP (x, 1)) == NOT)
22271 s = t[2];
22272 else
22273 s = t[1];
22276 fputs (s, file);
22278 return;
22280 case 'Q':
22281 if (! TARGET_MFCRF)
22282 return;
22283 fputc (',', file);
22284 /* FALLTHRU */
22286 case 'R':
22287 /* X is a CR register. Print the mask for `mtcrf'. */
22288 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
22289 output_operand_lossage ("invalid %%R value");
22290 else
22291 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
22292 return;
22294 case 's':
22295 /* Low 5 bits of 32 - value */
22296 if (! INT_P (x))
22297 output_operand_lossage ("invalid %%s value");
22298 else
22299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
22300 return;
22302 case 't':
22303 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
22304 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
22306 /* Bit 3 is OV bit. */
22307 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
22309 /* If we want bit 31, write a shift count of zero, not 32. */
22310 fprintf (file, "%d", i == 31 ? 0 : i + 1);
22311 return;
22313 case 'T':
22314 /* Print the symbolic name of a branch target register. */
22315 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
22316 && REGNO (x) != CTR_REGNO))
22317 output_operand_lossage ("invalid %%T value");
22318 else if (REGNO (x) == LR_REGNO)
22319 fputs ("lr", file);
22320 else
22321 fputs ("ctr", file);
22322 return;
22324 case 'u':
22325 /* High-order or low-order 16 bits of constant, whichever is non-zero,
22326 for use in unsigned operand. */
22327 if (! INT_P (x))
22329 output_operand_lossage ("invalid %%u value");
22330 return;
22333 uval = INTVAL (x);
22334 if ((uval & 0xffff) == 0)
22335 uval >>= 16;
22337 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
22338 return;
22340 case 'v':
22341 /* High-order 16 bits of constant for use in signed operand. */
22342 if (! INT_P (x))
22343 output_operand_lossage ("invalid %%v value");
22344 else
22345 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
22346 (INTVAL (x) >> 16) & 0xffff);
22347 return;
22349 case 'U':
22350 /* Print `u' if this has an auto-increment or auto-decrement. */
22351 if (MEM_P (x)
22352 && (GET_CODE (XEXP (x, 0)) == PRE_INC
22353 || GET_CODE (XEXP (x, 0)) == PRE_DEC
22354 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
22355 putc ('u', file);
22356 return;
22358 case 'V':
22359 /* Print the trap code for this operand. */
22360 switch (GET_CODE (x))
22362 case EQ:
22363 fputs ("eq", file); /* 4 */
22364 break;
22365 case NE:
22366 fputs ("ne", file); /* 24 */
22367 break;
22368 case LT:
22369 fputs ("lt", file); /* 16 */
22370 break;
22371 case LE:
22372 fputs ("le", file); /* 20 */
22373 break;
22374 case GT:
22375 fputs ("gt", file); /* 8 */
22376 break;
22377 case GE:
22378 fputs ("ge", file); /* 12 */
22379 break;
22380 case LTU:
22381 fputs ("llt", file); /* 2 */
22382 break;
22383 case LEU:
22384 fputs ("lle", file); /* 6 */
22385 break;
22386 case GTU:
22387 fputs ("lgt", file); /* 1 */
22388 break;
22389 case GEU:
22390 fputs ("lge", file); /* 5 */
22391 break;
22392 default:
22393 gcc_unreachable ();
22395 break;
22397 case 'w':
22398 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
22399 normally. */
22400 if (INT_P (x))
22401 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
22402 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
22403 else
22404 print_operand (file, x, 0);
22405 return;
22407 case 'x':
22408 /* X is a FPR or Altivec register used in a VSX context. */
22409 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
22410 output_operand_lossage ("invalid %%x value");
22411 else
22413 int reg = REGNO (x);
22414 int vsx_reg = (FP_REGNO_P (reg)
22415 ? reg - 32
22416 : reg - FIRST_ALTIVEC_REGNO + 32);
22418 #ifdef TARGET_REGNAMES
22419 if (TARGET_REGNAMES)
22420 fprintf (file, "%%vs%d", vsx_reg);
22421 else
22422 #endif
22423 fprintf (file, "%d", vsx_reg);
22425 return;
22427 case 'X':
22428 if (MEM_P (x)
22429 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
22430 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
22431 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
22432 putc ('x', file);
22433 return;
22435 case 'Y':
22436 /* Like 'L', for third word of TImode/PTImode */
22437 if (REG_P (x))
22438 fputs (reg_names[REGNO (x) + 2], file);
22439 else if (MEM_P (x))
22441 machine_mode mode = GET_MODE (x);
22442 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22443 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22444 output_address (mode, plus_constant (Pmode,
22445 XEXP (XEXP (x, 0), 0), 8));
22446 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22447 output_address (mode, plus_constant (Pmode,
22448 XEXP (XEXP (x, 0), 0), 8));
22449 else
22450 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
22451 if (small_data_operand (x, GET_MODE (x)))
22452 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22453 reg_names[SMALL_DATA_REG]);
22455 return;
22457 case 'z':
22458 /* X is a SYMBOL_REF. Write out the name preceded by a
22459 period and without any trailing data in brackets. Used for function
22460 names. If we are configured for System V (or the embedded ABI) on
22461 the PowerPC, do not emit the period, since those systems do not use
22462 TOCs and the like. */
22463 gcc_assert (GET_CODE (x) == SYMBOL_REF);
22465 /* For macho, check to see if we need a stub. */
22466 if (TARGET_MACHO)
22468 const char *name = XSTR (x, 0);
22469 #if TARGET_MACHO
22470 if (darwin_emit_branch_islands
22471 && MACHOPIC_INDIRECT
22472 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
22473 name = machopic_indirection_name (x, /*stub_p=*/true);
22474 #endif
22475 assemble_name (file, name);
22477 else if (!DOT_SYMBOLS)
22478 assemble_name (file, XSTR (x, 0));
22479 else
22480 rs6000_output_function_entry (file, XSTR (x, 0));
22481 return;
22483 case 'Z':
22484 /* Like 'L', for last word of TImode/PTImode. */
22485 if (REG_P (x))
22486 fputs (reg_names[REGNO (x) + 3], file);
22487 else if (MEM_P (x))
22489 machine_mode mode = GET_MODE (x);
22490 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22491 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22492 output_address (mode, plus_constant (Pmode,
22493 XEXP (XEXP (x, 0), 0), 12));
22494 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22495 output_address (mode, plus_constant (Pmode,
22496 XEXP (XEXP (x, 0), 0), 12));
22497 else
22498 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
22499 if (small_data_operand (x, GET_MODE (x)))
22500 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22501 reg_names[SMALL_DATA_REG]);
22503 return;
22505 /* Print AltiVec or SPE memory operand. */
22506 case 'y':
22508 rtx tmp;
22510 gcc_assert (MEM_P (x));
22512 tmp = XEXP (x, 0);
22514 /* Ugly hack because %y is overloaded. */
22515 if ((TARGET_SPE || TARGET_E500_DOUBLE)
22516 && (GET_MODE_SIZE (GET_MODE (x)) == 8
22517 || FLOAT128_2REG_P (GET_MODE (x))
22518 || GET_MODE (x) == TImode
22519 || GET_MODE (x) == PTImode))
22521 /* Handle [reg]. */
22522 if (REG_P (tmp))
22524 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
22525 break;
22527 /* Handle [reg+UIMM]. */
22528 else if (GET_CODE (tmp) == PLUS &&
22529 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
22531 int x;
22533 gcc_assert (REG_P (XEXP (tmp, 0)));
22535 x = INTVAL (XEXP (tmp, 1));
22536 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
22537 break;
22540 /* Fall through. Must be [reg+reg]. */
22542 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
22543 && GET_CODE (tmp) == AND
22544 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
22545 && INTVAL (XEXP (tmp, 1)) == -16)
22546 tmp = XEXP (tmp, 0);
22547 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
22548 && GET_CODE (tmp) == PRE_MODIFY)
22549 tmp = XEXP (tmp, 1);
22550 if (REG_P (tmp))
22551 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
22552 else
22554 if (GET_CODE (tmp) != PLUS
22555 || !REG_P (XEXP (tmp, 0))
22556 || !REG_P (XEXP (tmp, 1)))
22558 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
22559 break;
22562 if (REGNO (XEXP (tmp, 0)) == 0)
22563 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
22564 reg_names[ REGNO (XEXP (tmp, 0)) ]);
22565 else
22566 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
22567 reg_names[ REGNO (XEXP (tmp, 1)) ]);
22569 break;
22572 case 0:
22573 if (REG_P (x))
22574 fprintf (file, "%s", reg_names[REGNO (x)]);
22575 else if (MEM_P (x))
22577 /* We need to handle PRE_INC and PRE_DEC here, since we need to
22578 know the width from the mode. */
22579 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
22580 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
22581 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22582 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
22583 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
22584 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22585 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22586 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
22587 else
22588 output_address (GET_MODE (x), XEXP (x, 0));
22590 else
22592 if (toc_relative_expr_p (x, false))
22593 /* This hack along with a corresponding hack in
22594 rs6000_output_addr_const_extra arranges to output addends
22595 where the assembler expects to find them. eg.
22596 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
22597 without this hack would be output as "x@toc+4". We
22598 want "x+4@toc". */
22599 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22600 else
22601 output_addr_const (file, x);
22603 return;
22605 case '&':
22606 if (const char *name = get_some_local_dynamic_name ())
22607 assemble_name (file, name);
22608 else
22609 output_operand_lossage ("'%%&' used without any "
22610 "local dynamic TLS references");
22611 return;
22613 default:
22614 output_operand_lossage ("invalid %%xn code");
22618 /* Print the address of an operand. */
22620 void
22621 print_operand_address (FILE *file, rtx x)
22623 if (REG_P (x))
22624 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
22625 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
22626 || GET_CODE (x) == LABEL_REF)
22628 output_addr_const (file, x);
22629 if (small_data_operand (x, GET_MODE (x)))
22630 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22631 reg_names[SMALL_DATA_REG]);
22632 else
22633 gcc_assert (!TARGET_TOC);
22635 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22636 && REG_P (XEXP (x, 1)))
22638 if (REGNO (XEXP (x, 0)) == 0)
22639 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
22640 reg_names[ REGNO (XEXP (x, 0)) ]);
22641 else
22642 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
22643 reg_names[ REGNO (XEXP (x, 1)) ]);
22645 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22646 && GET_CODE (XEXP (x, 1)) == CONST_INT)
22647 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
22648 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
22649 #if TARGET_MACHO
22650 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22651 && CONSTANT_P (XEXP (x, 1)))
22653 fprintf (file, "lo16(");
22654 output_addr_const (file, XEXP (x, 1));
22655 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22657 #endif
22658 #if TARGET_ELF
22659 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22660 && CONSTANT_P (XEXP (x, 1)))
22662 output_addr_const (file, XEXP (x, 1));
22663 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22665 #endif
22666 else if (toc_relative_expr_p (x, false))
22668 /* This hack along with a corresponding hack in
22669 rs6000_output_addr_const_extra arranges to output addends
22670 where the assembler expects to find them. eg.
22671 (lo_sum (reg 9)
22672 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
22673 without this hack would be output as "x@toc+8@l(9)". We
22674 want "x+8@toc@l(9)". */
22675 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22676 if (GET_CODE (x) == LO_SUM)
22677 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
22678 else
22679 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
22681 else
22682 gcc_unreachable ();
22685 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
22687 static bool
22688 rs6000_output_addr_const_extra (FILE *file, rtx x)
22690 if (GET_CODE (x) == UNSPEC)
22691 switch (XINT (x, 1))
22693 case UNSPEC_TOCREL:
22694 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
22695 && REG_P (XVECEXP (x, 0, 1))
22696 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
22697 output_addr_const (file, XVECEXP (x, 0, 0));
22698 if (x == tocrel_base && tocrel_offset != const0_rtx)
22700 if (INTVAL (tocrel_offset) >= 0)
22701 fprintf (file, "+");
22702 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
22704 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
22706 putc ('-', file);
22707 assemble_name (file, toc_label_name);
22708 need_toc_init = 1;
22710 else if (TARGET_ELF)
22711 fputs ("@toc", file);
22712 return true;
22714 #if TARGET_MACHO
22715 case UNSPEC_MACHOPIC_OFFSET:
22716 output_addr_const (file, XVECEXP (x, 0, 0));
22717 putc ('-', file);
22718 machopic_output_function_base_name (file);
22719 return true;
22720 #endif
22722 return false;
22725 /* Target hook for assembling integer objects. The PowerPC version has
22726 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
22727 is defined. It also needs to handle DI-mode objects on 64-bit
22728 targets. */
22730 static bool
22731 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
22733 #ifdef RELOCATABLE_NEEDS_FIXUP
22734 /* Special handling for SI values. */
22735 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
22737 static int recurse = 0;
22739 /* For -mrelocatable, we mark all addresses that need to be fixed up in
22740 the .fixup section. Since the TOC section is already relocated, we
22741 don't need to mark it here. We used to skip the text section, but it
22742 should never be valid for relocated addresses to be placed in the text
22743 section. */
22744 if (DEFAULT_ABI == ABI_V4
22745 && (TARGET_RELOCATABLE || flag_pic > 1)
22746 && in_section != toc_section
22747 && !recurse
22748 && !CONST_SCALAR_INT_P (x)
22749 && CONSTANT_P (x))
22751 char buf[256];
22753 recurse = 1;
22754 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
22755 fixuplabelno++;
22756 ASM_OUTPUT_LABEL (asm_out_file, buf);
22757 fprintf (asm_out_file, "\t.long\t(");
22758 output_addr_const (asm_out_file, x);
22759 fprintf (asm_out_file, ")@fixup\n");
22760 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
22761 ASM_OUTPUT_ALIGN (asm_out_file, 2);
22762 fprintf (asm_out_file, "\t.long\t");
22763 assemble_name (asm_out_file, buf);
22764 fprintf (asm_out_file, "\n\t.previous\n");
22765 recurse = 0;
22766 return true;
22768 /* Remove initial .'s to turn a -mcall-aixdesc function
22769 address into the address of the descriptor, not the function
22770 itself. */
22771 else if (GET_CODE (x) == SYMBOL_REF
22772 && XSTR (x, 0)[0] == '.'
22773 && DEFAULT_ABI == ABI_AIX)
22775 const char *name = XSTR (x, 0);
22776 while (*name == '.')
22777 name++;
22779 fprintf (asm_out_file, "\t.long\t%s\n", name);
22780 return true;
22783 #endif /* RELOCATABLE_NEEDS_FIXUP */
22784 return default_assemble_integer (x, size, aligned_p);
22787 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22788 /* Emit an assembler directive to set symbol visibility for DECL to
22789 VISIBILITY_TYPE. */
22791 static void
22792 rs6000_assemble_visibility (tree decl, int vis)
22794 if (TARGET_XCOFF)
22795 return;
22797 /* Functions need to have their entry point symbol visibility set as
22798 well as their descriptor symbol visibility. */
22799 if (DEFAULT_ABI == ABI_AIX
22800 && DOT_SYMBOLS
22801 && TREE_CODE (decl) == FUNCTION_DECL)
22803 static const char * const visibility_types[] = {
22804 NULL, "protected", "hidden", "internal"
22807 const char *name, *type;
22809 name = ((* targetm.strip_name_encoding)
22810 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22811 type = visibility_types[vis];
22813 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22814 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22816 else
22817 default_assemble_visibility (decl, vis);
22819 #endif
22821 enum rtx_code
22822 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22824 /* Reversal of FP compares takes care -- an ordered compare
22825 becomes an unordered compare and vice versa. */
22826 if (mode == CCFPmode
22827 && (!flag_finite_math_only
22828 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22829 || code == UNEQ || code == LTGT))
22830 return reverse_condition_maybe_unordered (code);
22831 else
22832 return reverse_condition (code);
22835 /* Generate a compare for CODE. Return a brand-new rtx that
22836 represents the result of the compare. */
22838 static rtx
22839 rs6000_generate_compare (rtx cmp, machine_mode mode)
22841 machine_mode comp_mode;
22842 rtx compare_result;
22843 enum rtx_code code = GET_CODE (cmp);
22844 rtx op0 = XEXP (cmp, 0);
22845 rtx op1 = XEXP (cmp, 1);
22847 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22848 comp_mode = CCmode;
22849 else if (FLOAT_MODE_P (mode))
22850 comp_mode = CCFPmode;
22851 else if (code == GTU || code == LTU
22852 || code == GEU || code == LEU)
22853 comp_mode = CCUNSmode;
22854 else if ((code == EQ || code == NE)
22855 && unsigned_reg_p (op0)
22856 && (unsigned_reg_p (op1)
22857 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22858 /* These are unsigned values, perhaps there will be a later
22859 ordering compare that can be shared with this one. */
22860 comp_mode = CCUNSmode;
22861 else
22862 comp_mode = CCmode;
22864 /* If we have an unsigned compare, make sure we don't have a signed value as
22865 an immediate. */
22866 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22867 && INTVAL (op1) < 0)
22869 op0 = copy_rtx_if_shared (op0);
22870 op1 = force_reg (GET_MODE (op0), op1);
22871 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22874 /* First, the compare. */
22875 compare_result = gen_reg_rtx (comp_mode);
22877 /* E500 FP compare instructions on the GPRs. Yuck! */
22878 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
22879 && FLOAT_MODE_P (mode))
22881 rtx cmp, or_result, compare_result2;
22882 machine_mode op_mode = GET_MODE (op0);
22883 bool reverse_p;
22885 if (op_mode == VOIDmode)
22886 op_mode = GET_MODE (op1);
22888 /* First reverse the condition codes that aren't directly supported. */
22889 switch (code)
22891 case NE:
22892 case UNLT:
22893 case UNLE:
22894 case UNGT:
22895 case UNGE:
22896 code = reverse_condition_maybe_unordered (code);
22897 reverse_p = true;
22898 break;
22900 case EQ:
22901 case LT:
22902 case LE:
22903 case GT:
22904 case GE:
22905 reverse_p = false;
22906 break;
22908 default:
22909 gcc_unreachable ();
22912 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
22913 This explains the following mess. */
22915 switch (code)
22917 case EQ:
22918 switch (op_mode)
22920 case SFmode:
22921 cmp = (flag_finite_math_only && !flag_trapping_math)
22922 ? gen_tstsfeq_gpr (compare_result, op0, op1)
22923 : gen_cmpsfeq_gpr (compare_result, op0, op1);
22924 break;
22926 case DFmode:
22927 cmp = (flag_finite_math_only && !flag_trapping_math)
22928 ? gen_tstdfeq_gpr (compare_result, op0, op1)
22929 : gen_cmpdfeq_gpr (compare_result, op0, op1);
22930 break;
22932 case TFmode:
22933 case IFmode:
22934 case KFmode:
22935 cmp = (flag_finite_math_only && !flag_trapping_math)
22936 ? gen_tsttfeq_gpr (compare_result, op0, op1)
22937 : gen_cmptfeq_gpr (compare_result, op0, op1);
22938 break;
22940 default:
22941 gcc_unreachable ();
22943 break;
22945 case GT:
22946 case GE:
22947 switch (op_mode)
22949 case SFmode:
22950 cmp = (flag_finite_math_only && !flag_trapping_math)
22951 ? gen_tstsfgt_gpr (compare_result, op0, op1)
22952 : gen_cmpsfgt_gpr (compare_result, op0, op1);
22953 break;
22955 case DFmode:
22956 cmp = (flag_finite_math_only && !flag_trapping_math)
22957 ? gen_tstdfgt_gpr (compare_result, op0, op1)
22958 : gen_cmpdfgt_gpr (compare_result, op0, op1);
22959 break;
22961 case TFmode:
22962 case IFmode:
22963 case KFmode:
22964 cmp = (flag_finite_math_only && !flag_trapping_math)
22965 ? gen_tsttfgt_gpr (compare_result, op0, op1)
22966 : gen_cmptfgt_gpr (compare_result, op0, op1);
22967 break;
22969 default:
22970 gcc_unreachable ();
22972 break;
22974 case LT:
22975 case LE:
22976 switch (op_mode)
22978 case SFmode:
22979 cmp = (flag_finite_math_only && !flag_trapping_math)
22980 ? gen_tstsflt_gpr (compare_result, op0, op1)
22981 : gen_cmpsflt_gpr (compare_result, op0, op1);
22982 break;
22984 case DFmode:
22985 cmp = (flag_finite_math_only && !flag_trapping_math)
22986 ? gen_tstdflt_gpr (compare_result, op0, op1)
22987 : gen_cmpdflt_gpr (compare_result, op0, op1);
22988 break;
22990 case TFmode:
22991 case IFmode:
22992 case KFmode:
22993 cmp = (flag_finite_math_only && !flag_trapping_math)
22994 ? gen_tsttflt_gpr (compare_result, op0, op1)
22995 : gen_cmptflt_gpr (compare_result, op0, op1);
22996 break;
22998 default:
22999 gcc_unreachable ();
23001 break;
23003 default:
23004 gcc_unreachable ();
23007 /* Synthesize LE and GE from LT/GT || EQ. */
23008 if (code == LE || code == GE)
23010 emit_insn (cmp);
23012 compare_result2 = gen_reg_rtx (CCFPmode);
23014 /* Do the EQ. */
23015 switch (op_mode)
23017 case SFmode:
23018 cmp = (flag_finite_math_only && !flag_trapping_math)
23019 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
23020 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
23021 break;
23023 case DFmode:
23024 cmp = (flag_finite_math_only && !flag_trapping_math)
23025 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
23026 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
23027 break;
23029 case TFmode:
23030 case IFmode:
23031 case KFmode:
23032 cmp = (flag_finite_math_only && !flag_trapping_math)
23033 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
23034 : gen_cmptfeq_gpr (compare_result2, op0, op1);
23035 break;
23037 default:
23038 gcc_unreachable ();
23041 emit_insn (cmp);
23043 /* OR them together. */
23044 or_result = gen_reg_rtx (CCFPmode);
23045 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
23046 compare_result2);
23047 compare_result = or_result;
23050 code = reverse_p ? NE : EQ;
23052 emit_insn (cmp);
23055 /* IEEE 128-bit support in VSX registers when we do not have hardware
23056 support. */
23057 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
23059 rtx libfunc = NULL_RTX;
23060 bool check_nan = false;
23061 rtx dest;
23063 switch (code)
23065 case EQ:
23066 case NE:
23067 libfunc = optab_libfunc (eq_optab, mode);
23068 break;
23070 case GT:
23071 case GE:
23072 libfunc = optab_libfunc (ge_optab, mode);
23073 break;
23075 case LT:
23076 case LE:
23077 libfunc = optab_libfunc (le_optab, mode);
23078 break;
23080 case UNORDERED:
23081 case ORDERED:
23082 libfunc = optab_libfunc (unord_optab, mode);
23083 code = (code == UNORDERED) ? NE : EQ;
23084 break;
23086 case UNGE:
23087 case UNGT:
23088 check_nan = true;
23089 libfunc = optab_libfunc (ge_optab, mode);
23090 code = (code == UNGE) ? GE : GT;
23091 break;
23093 case UNLE:
23094 case UNLT:
23095 check_nan = true;
23096 libfunc = optab_libfunc (le_optab, mode);
23097 code = (code == UNLE) ? LE : LT;
23098 break;
23100 case UNEQ:
23101 case LTGT:
23102 check_nan = true;
23103 libfunc = optab_libfunc (eq_optab, mode);
23104 code = (code = UNEQ) ? EQ : NE;
23105 break;
23107 default:
23108 gcc_unreachable ();
23111 gcc_assert (libfunc);
23113 if (!check_nan)
23114 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23115 SImode, 2, op0, mode, op1, mode);
23117 /* The library signals an exception for signalling NaNs, so we need to
23118 handle isgreater, etc. by first checking isordered. */
23119 else
23121 rtx ne_rtx, normal_dest, unord_dest;
23122 rtx unord_func = optab_libfunc (unord_optab, mode);
23123 rtx join_label = gen_label_rtx ();
23124 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
23125 rtx unord_cmp = gen_reg_rtx (comp_mode);
23128 /* Test for either value being a NaN. */
23129 gcc_assert (unord_func);
23130 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
23131 SImode, 2, op0, mode, op1,
23132 mode);
23134 /* Set value (0) if either value is a NaN, and jump to the join
23135 label. */
23136 dest = gen_reg_rtx (SImode);
23137 emit_move_insn (dest, const1_rtx);
23138 emit_insn (gen_rtx_SET (unord_cmp,
23139 gen_rtx_COMPARE (comp_mode, unord_dest,
23140 const0_rtx)));
23142 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
23143 emit_jump_insn (gen_rtx_SET (pc_rtx,
23144 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
23145 join_ref,
23146 pc_rtx)));
23148 /* Do the normal comparison, knowing that the values are not
23149 NaNs. */
23150 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
23151 SImode, 2, op0, mode, op1,
23152 mode);
23154 emit_insn (gen_cstoresi4 (dest,
23155 gen_rtx_fmt_ee (code, SImode, normal_dest,
23156 const0_rtx),
23157 normal_dest, const0_rtx));
23159 /* Join NaN and non-Nan paths. Compare dest against 0. */
23160 emit_label (join_label);
23161 code = NE;
23164 emit_insn (gen_rtx_SET (compare_result,
23165 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
23168 else
23170 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
23171 CLOBBERs to match cmptf_internal2 pattern. */
23172 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
23173 && FLOAT128_IBM_P (GET_MODE (op0))
23174 && TARGET_HARD_FLOAT && TARGET_FPRS)
23175 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23176 gen_rtvec (10,
23177 gen_rtx_SET (compare_result,
23178 gen_rtx_COMPARE (comp_mode, op0, op1)),
23179 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23180 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23181 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23182 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23183 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23184 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23185 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23186 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
23187 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
23188 else if (GET_CODE (op1) == UNSPEC
23189 && XINT (op1, 1) == UNSPEC_SP_TEST)
23191 rtx op1b = XVECEXP (op1, 0, 0);
23192 comp_mode = CCEQmode;
23193 compare_result = gen_reg_rtx (CCEQmode);
23194 if (TARGET_64BIT)
23195 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
23196 else
23197 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
23199 else
23200 emit_insn (gen_rtx_SET (compare_result,
23201 gen_rtx_COMPARE (comp_mode, op0, op1)));
23204 /* Some kinds of FP comparisons need an OR operation;
23205 under flag_finite_math_only we don't bother. */
23206 if (FLOAT_MODE_P (mode)
23207 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
23208 && !flag_finite_math_only
23209 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
23210 && (code == LE || code == GE
23211 || code == UNEQ || code == LTGT
23212 || code == UNGT || code == UNLT))
23214 enum rtx_code or1, or2;
23215 rtx or1_rtx, or2_rtx, compare2_rtx;
23216 rtx or_result = gen_reg_rtx (CCEQmode);
23218 switch (code)
23220 case LE: or1 = LT; or2 = EQ; break;
23221 case GE: or1 = GT; or2 = EQ; break;
23222 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
23223 case LTGT: or1 = LT; or2 = GT; break;
23224 case UNGT: or1 = UNORDERED; or2 = GT; break;
23225 case UNLT: or1 = UNORDERED; or2 = LT; break;
23226 default: gcc_unreachable ();
23228 validate_condition_mode (or1, comp_mode);
23229 validate_condition_mode (or2, comp_mode);
23230 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
23231 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
23232 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
23233 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
23234 const_true_rtx);
23235 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
23237 compare_result = or_result;
23238 code = EQ;
23241 validate_condition_mode (code, GET_MODE (compare_result));
23243 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
23247 /* Return the diagnostic message string if the binary operation OP is
23248 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23250 static const char*
23251 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
23252 const_tree type1,
23253 const_tree type2)
23255 enum machine_mode mode1 = TYPE_MODE (type1);
23256 enum machine_mode mode2 = TYPE_MODE (type2);
23258 /* For complex modes, use the inner type. */
23259 if (COMPLEX_MODE_P (mode1))
23260 mode1 = GET_MODE_INNER (mode1);
23262 if (COMPLEX_MODE_P (mode2))
23263 mode2 = GET_MODE_INNER (mode2);
23265 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
23266 double to intermix unless -mfloat128-convert. */
23267 if (mode1 == mode2)
23268 return NULL;
23270 if (!TARGET_FLOAT128_CVT)
23272 if ((mode1 == KFmode && mode2 == IFmode)
23273 || (mode1 == IFmode && mode2 == KFmode))
23274 return N_("__float128 and __ibm128 cannot be used in the same "
23275 "expression");
23277 if (TARGET_IEEEQUAD
23278 && ((mode1 == IFmode && mode2 == TFmode)
23279 || (mode1 == TFmode && mode2 == IFmode)))
23280 return N_("__ibm128 and long double cannot be used in the same "
23281 "expression");
23283 if (!TARGET_IEEEQUAD
23284 && ((mode1 == KFmode && mode2 == TFmode)
23285 || (mode1 == TFmode && mode2 == KFmode)))
23286 return N_("__float128 and long double cannot be used in the same "
23287 "expression");
23290 return NULL;
23294 /* Expand floating point conversion to/from __float128 and __ibm128. */
23296 void
23297 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
23299 machine_mode dest_mode = GET_MODE (dest);
23300 machine_mode src_mode = GET_MODE (src);
23301 convert_optab cvt = unknown_optab;
23302 bool do_move = false;
23303 rtx libfunc = NULL_RTX;
23304 rtx dest2;
23305 typedef rtx (*rtx_2func_t) (rtx, rtx);
23306 rtx_2func_t hw_convert = (rtx_2func_t)0;
23307 size_t kf_or_tf;
23309 struct hw_conv_t {
23310 rtx_2func_t from_df;
23311 rtx_2func_t from_sf;
23312 rtx_2func_t from_si_sign;
23313 rtx_2func_t from_si_uns;
23314 rtx_2func_t from_di_sign;
23315 rtx_2func_t from_di_uns;
23316 rtx_2func_t to_df;
23317 rtx_2func_t to_sf;
23318 rtx_2func_t to_si_sign;
23319 rtx_2func_t to_si_uns;
23320 rtx_2func_t to_di_sign;
23321 rtx_2func_t to_di_uns;
23322 } hw_conversions[2] = {
23323 /* convertions to/from KFmode */
23325 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
23326 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
23327 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
23328 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
23329 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
23330 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
23331 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
23332 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
23333 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
23334 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
23335 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
23336 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
23339 /* convertions to/from TFmode */
23341 gen_extenddftf2_hw, /* TFmode <- DFmode. */
23342 gen_extendsftf2_hw, /* TFmode <- SFmode. */
23343 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
23344 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
23345 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
23346 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
23347 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
23348 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
23349 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
23350 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
23351 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
23352 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
23356 if (dest_mode == src_mode)
23357 gcc_unreachable ();
23359 /* Eliminate memory operations. */
23360 if (MEM_P (src))
23361 src = force_reg (src_mode, src);
23363 if (MEM_P (dest))
23365 rtx tmp = gen_reg_rtx (dest_mode);
23366 rs6000_expand_float128_convert (tmp, src, unsigned_p);
23367 rs6000_emit_move (dest, tmp, dest_mode);
23368 return;
23371 /* Convert to IEEE 128-bit floating point. */
23372 if (FLOAT128_IEEE_P (dest_mode))
23374 if (dest_mode == KFmode)
23375 kf_or_tf = 0;
23376 else if (dest_mode == TFmode)
23377 kf_or_tf = 1;
23378 else
23379 gcc_unreachable ();
23381 switch (src_mode)
23383 case DFmode:
23384 cvt = sext_optab;
23385 hw_convert = hw_conversions[kf_or_tf].from_df;
23386 break;
23388 case SFmode:
23389 cvt = sext_optab;
23390 hw_convert = hw_conversions[kf_or_tf].from_sf;
23391 break;
23393 case KFmode:
23394 case IFmode:
23395 case TFmode:
23396 if (FLOAT128_IBM_P (src_mode))
23397 cvt = sext_optab;
23398 else
23399 do_move = true;
23400 break;
23402 case SImode:
23403 if (unsigned_p)
23405 cvt = ufloat_optab;
23406 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
23408 else
23410 cvt = sfloat_optab;
23411 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
23413 break;
23415 case DImode:
23416 if (unsigned_p)
23418 cvt = ufloat_optab;
23419 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
23421 else
23423 cvt = sfloat_optab;
23424 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
23426 break;
23428 default:
23429 gcc_unreachable ();
23433 /* Convert from IEEE 128-bit floating point. */
23434 else if (FLOAT128_IEEE_P (src_mode))
23436 if (src_mode == KFmode)
23437 kf_or_tf = 0;
23438 else if (src_mode == TFmode)
23439 kf_or_tf = 1;
23440 else
23441 gcc_unreachable ();
23443 switch (dest_mode)
23445 case DFmode:
23446 cvt = trunc_optab;
23447 hw_convert = hw_conversions[kf_or_tf].to_df;
23448 break;
23450 case SFmode:
23451 cvt = trunc_optab;
23452 hw_convert = hw_conversions[kf_or_tf].to_sf;
23453 break;
23455 case KFmode:
23456 case IFmode:
23457 case TFmode:
23458 if (FLOAT128_IBM_P (dest_mode))
23459 cvt = trunc_optab;
23460 else
23461 do_move = true;
23462 break;
23464 case SImode:
23465 if (unsigned_p)
23467 cvt = ufix_optab;
23468 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
23470 else
23472 cvt = sfix_optab;
23473 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
23475 break;
23477 case DImode:
23478 if (unsigned_p)
23480 cvt = ufix_optab;
23481 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
23483 else
23485 cvt = sfix_optab;
23486 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
23488 break;
23490 default:
23491 gcc_unreachable ();
23495 /* Both IBM format. */
23496 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
23497 do_move = true;
23499 else
23500 gcc_unreachable ();
23502 /* Handle conversion between TFmode/KFmode. */
23503 if (do_move)
23504 emit_move_insn (dest, gen_lowpart (dest_mode, src));
23506 /* Handle conversion if we have hardware support. */
23507 else if (TARGET_FLOAT128_HW && hw_convert)
23508 emit_insn ((hw_convert) (dest, src));
23510 /* Call an external function to do the conversion. */
23511 else if (cvt != unknown_optab)
23513 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
23514 gcc_assert (libfunc != NULL_RTX);
23516 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
23517 src_mode);
23519 gcc_assert (dest2 != NULL_RTX);
23520 if (!rtx_equal_p (dest, dest2))
23521 emit_move_insn (dest, dest2);
23524 else
23525 gcc_unreachable ();
23527 return;
23530 /* Split a conversion from __float128 to an integer type into separate insns.
23531 OPERANDS points to the destination, source, and V2DI temporary
23532 register. CODE is either FIX or UNSIGNED_FIX. */
23534 void
23535 convert_float128_to_int (rtx *operands, enum rtx_code code)
23537 rtx dest = operands[0];
23538 rtx src = operands[1];
23539 rtx tmp = operands[2];
23540 rtx cvt;
23541 rtvec cvt_vec;
23542 rtx cvt_unspec;
23543 rtvec move_vec;
23544 rtx move_unspec;
23546 if (GET_CODE (tmp) == SCRATCH)
23547 tmp = gen_reg_rtx (V2DImode);
23549 if (MEM_P (dest))
23550 dest = rs6000_address_for_fpconvert (dest);
23552 /* Generate the actual convert insn of the form:
23553 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
23554 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
23555 cvt_vec = gen_rtvec (1, cvt);
23556 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23557 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
23559 /* Generate the move insn of the form:
23560 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
23561 move_vec = gen_rtvec (1, tmp);
23562 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
23563 emit_insn (gen_rtx_SET (dest, move_unspec));
23566 /* Split a conversion from an integer type to __float128 into separate insns.
23567 OPERANDS points to the destination, source, and V2DI temporary
23568 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
23570 void
23571 convert_int_to_float128 (rtx *operands, enum rtx_code code)
23573 rtx dest = operands[0];
23574 rtx src = operands[1];
23575 rtx tmp = operands[2];
23576 rtx cvt;
23577 rtvec cvt_vec;
23578 rtx cvt_unspec;
23579 rtvec move_vec;
23580 rtx move_unspec;
23581 rtx unsigned_flag;
23583 if (GET_CODE (tmp) == SCRATCH)
23584 tmp = gen_reg_rtx (V2DImode);
23586 if (MEM_P (src))
23587 src = rs6000_address_for_fpconvert (src);
23589 /* Generate the move of the integer into the Altivec register of the form:
23590 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
23591 (const_int 0)] UNSPEC_IEEE128_MOVE)).
23594 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
23596 if (GET_MODE (src) == SImode)
23598 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
23599 move_vec = gen_rtvec (2, src, unsigned_flag);
23601 else
23602 move_vec = gen_rtvec (1, src);
23604 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
23605 emit_insn (gen_rtx_SET (tmp, move_unspec));
23607 /* Generate the actual convert insn of the form:
23608 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
23609 UNSPEC_IEEE128_CONVERT))). */
23610 cvt_vec = gen_rtvec (1, tmp);
23611 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23612 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
23613 emit_insn (gen_rtx_SET (dest, cvt));
23617 /* Emit the RTL for an sISEL pattern. */
23619 void
23620 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
23622 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
23625 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
23626 can be used as that dest register. Return the dest register. */
23629 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
23631 if (op2 == const0_rtx)
23632 return op1;
23634 if (GET_CODE (scratch) == SCRATCH)
23635 scratch = gen_reg_rtx (mode);
23637 if (logical_operand (op2, mode))
23638 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
23639 else
23640 emit_insn (gen_rtx_SET (scratch,
23641 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
23643 return scratch;
23646 void
23647 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
23649 rtx condition_rtx;
23650 machine_mode op_mode;
23651 enum rtx_code cond_code;
23652 rtx result = operands[0];
23654 condition_rtx = rs6000_generate_compare (operands[1], mode);
23655 cond_code = GET_CODE (condition_rtx);
23657 if (FLOAT_MODE_P (mode)
23658 && !TARGET_FPRS && TARGET_HARD_FLOAT)
23660 rtx t;
23662 PUT_MODE (condition_rtx, SImode);
23663 t = XEXP (condition_rtx, 0);
23665 gcc_assert (cond_code == NE || cond_code == EQ);
23667 if (cond_code == NE)
23668 emit_insn (gen_e500_flip_gt_bit (t, t));
23670 emit_insn (gen_move_from_CR_gt_bit (result, t));
23671 return;
23674 if (cond_code == NE
23675 || cond_code == GE || cond_code == LE
23676 || cond_code == GEU || cond_code == LEU
23677 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
23679 rtx not_result = gen_reg_rtx (CCEQmode);
23680 rtx not_op, rev_cond_rtx;
23681 machine_mode cc_mode;
23683 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
23685 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
23686 SImode, XEXP (condition_rtx, 0), const0_rtx);
23687 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
23688 emit_insn (gen_rtx_SET (not_result, not_op));
23689 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
23692 op_mode = GET_MODE (XEXP (operands[1], 0));
23693 if (op_mode == VOIDmode)
23694 op_mode = GET_MODE (XEXP (operands[1], 1));
23696 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
23698 PUT_MODE (condition_rtx, DImode);
23699 convert_move (result, condition_rtx, 0);
23701 else
23703 PUT_MODE (condition_rtx, SImode);
23704 emit_insn (gen_rtx_SET (result, condition_rtx));
23708 /* Emit a branch of kind CODE to location LOC. */
23710 void
23711 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
23713 rtx condition_rtx, loc_ref;
23715 condition_rtx = rs6000_generate_compare (operands[0], mode);
23716 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
23717 emit_jump_insn (gen_rtx_SET (pc_rtx,
23718 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
23719 loc_ref, pc_rtx)));
23722 /* Return the string to output a conditional branch to LABEL, which is
23723 the operand template of the label, or NULL if the branch is really a
23724 conditional return.
23726 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
23727 condition code register and its mode specifies what kind of
23728 comparison we made.
23730 REVERSED is nonzero if we should reverse the sense of the comparison.
23732 INSN is the insn. */
23734 char *
23735 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
23737 static char string[64];
23738 enum rtx_code code = GET_CODE (op);
23739 rtx cc_reg = XEXP (op, 0);
23740 machine_mode mode = GET_MODE (cc_reg);
23741 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
23742 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
23743 int really_reversed = reversed ^ need_longbranch;
23744 char *s = string;
23745 const char *ccode;
23746 const char *pred;
23747 rtx note;
23749 validate_condition_mode (code, mode);
23751 /* Work out which way this really branches. We could use
23752 reverse_condition_maybe_unordered here always but this
23753 makes the resulting assembler clearer. */
23754 if (really_reversed)
23756 /* Reversal of FP compares takes care -- an ordered compare
23757 becomes an unordered compare and vice versa. */
23758 if (mode == CCFPmode)
23759 code = reverse_condition_maybe_unordered (code);
23760 else
23761 code = reverse_condition (code);
23764 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
23766 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
23767 to the GT bit. */
23768 switch (code)
23770 case EQ:
23771 /* Opposite of GT. */
23772 code = GT;
23773 break;
23775 case NE:
23776 code = UNLE;
23777 break;
23779 default:
23780 gcc_unreachable ();
23784 switch (code)
23786 /* Not all of these are actually distinct opcodes, but
23787 we distinguish them for clarity of the resulting assembler. */
23788 case NE: case LTGT:
23789 ccode = "ne"; break;
23790 case EQ: case UNEQ:
23791 ccode = "eq"; break;
23792 case GE: case GEU:
23793 ccode = "ge"; break;
23794 case GT: case GTU: case UNGT:
23795 ccode = "gt"; break;
23796 case LE: case LEU:
23797 ccode = "le"; break;
23798 case LT: case LTU: case UNLT:
23799 ccode = "lt"; break;
23800 case UNORDERED: ccode = "un"; break;
23801 case ORDERED: ccode = "nu"; break;
23802 case UNGE: ccode = "nl"; break;
23803 case UNLE: ccode = "ng"; break;
23804 default:
23805 gcc_unreachable ();
23808 /* Maybe we have a guess as to how likely the branch is. */
23809 pred = "";
23810 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
23811 if (note != NULL_RTX)
23813 /* PROB is the difference from 50%. */
23814 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
23816 /* Only hint for highly probable/improbable branches on newer cpus when
23817 we have real profile data, as static prediction overrides processor
23818 dynamic prediction. For older cpus we may as well always hint, but
23819 assume not taken for branches that are very close to 50% as a
23820 mispredicted taken branch is more expensive than a
23821 mispredicted not-taken branch. */
23822 if (rs6000_always_hint
23823 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
23824 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
23825 && br_prob_note_reliable_p (note)))
23827 if (abs (prob) > REG_BR_PROB_BASE / 20
23828 && ((prob > 0) ^ need_longbranch))
23829 pred = "+";
23830 else
23831 pred = "-";
23835 if (label == NULL)
23836 s += sprintf (s, "b%slr%s ", ccode, pred);
23837 else
23838 s += sprintf (s, "b%s%s ", ccode, pred);
23840 /* We need to escape any '%' characters in the reg_names string.
23841 Assume they'd only be the first character.... */
23842 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
23843 *s++ = '%';
23844 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
23846 if (label != NULL)
23848 /* If the branch distance was too far, we may have to use an
23849 unconditional branch to go the distance. */
23850 if (need_longbranch)
23851 s += sprintf (s, ",$+8\n\tb %s", label);
23852 else
23853 s += sprintf (s, ",%s", label);
23856 return string;
23859 /* Return the string to flip the GT bit on a CR. */
23860 char *
23861 output_e500_flip_gt_bit (rtx dst, rtx src)
23863 static char string[64];
23864 int a, b;
23866 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
23867 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
23869 /* GT bit. */
23870 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
23871 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
23873 sprintf (string, "crnot %d,%d", a, b);
23874 return string;
23877 /* Return insn for VSX or Altivec comparisons. */
23879 static rtx
23880 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
23882 rtx mask;
23883 machine_mode mode = GET_MODE (op0);
23885 switch (code)
23887 default:
23888 break;
23890 case GE:
23891 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23892 return NULL_RTX;
23893 /* FALLTHRU */
23895 case EQ:
23896 case GT:
23897 case GTU:
23898 case ORDERED:
23899 case UNORDERED:
23900 case UNEQ:
23901 case LTGT:
23902 mask = gen_reg_rtx (mode);
23903 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
23904 return mask;
23907 return NULL_RTX;
23910 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
23911 DMODE is expected destination mode. This is a recursive function. */
23913 static rtx
23914 rs6000_emit_vector_compare (enum rtx_code rcode,
23915 rtx op0, rtx op1,
23916 machine_mode dmode)
23918 rtx mask;
23919 bool swap_operands = false;
23920 bool try_again = false;
23922 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
23923 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
23925 /* See if the comparison works as is. */
23926 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23927 if (mask)
23928 return mask;
23930 switch (rcode)
23932 case LT:
23933 rcode = GT;
23934 swap_operands = true;
23935 try_again = true;
23936 break;
23937 case LTU:
23938 rcode = GTU;
23939 swap_operands = true;
23940 try_again = true;
23941 break;
23942 case NE:
23943 case UNLE:
23944 case UNLT:
23945 case UNGE:
23946 case UNGT:
23947 /* Invert condition and try again.
23948 e.g., A != B becomes ~(A==B). */
23950 enum rtx_code rev_code;
23951 enum insn_code nor_code;
23952 rtx mask2;
23954 rev_code = reverse_condition_maybe_unordered (rcode);
23955 if (rev_code == UNKNOWN)
23956 return NULL_RTX;
23958 nor_code = optab_handler (one_cmpl_optab, dmode);
23959 if (nor_code == CODE_FOR_nothing)
23960 return NULL_RTX;
23962 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
23963 if (!mask2)
23964 return NULL_RTX;
23966 mask = gen_reg_rtx (dmode);
23967 emit_insn (GEN_FCN (nor_code) (mask, mask2));
23968 return mask;
23970 break;
23971 case GE:
23972 case GEU:
23973 case LE:
23974 case LEU:
23975 /* Try GT/GTU/LT/LTU OR EQ */
23977 rtx c_rtx, eq_rtx;
23978 enum insn_code ior_code;
23979 enum rtx_code new_code;
23981 switch (rcode)
23983 case GE:
23984 new_code = GT;
23985 break;
23987 case GEU:
23988 new_code = GTU;
23989 break;
23991 case LE:
23992 new_code = LT;
23993 break;
23995 case LEU:
23996 new_code = LTU;
23997 break;
23999 default:
24000 gcc_unreachable ();
24003 ior_code = optab_handler (ior_optab, dmode);
24004 if (ior_code == CODE_FOR_nothing)
24005 return NULL_RTX;
24007 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
24008 if (!c_rtx)
24009 return NULL_RTX;
24011 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
24012 if (!eq_rtx)
24013 return NULL_RTX;
24015 mask = gen_reg_rtx (dmode);
24016 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
24017 return mask;
24019 break;
24020 default:
24021 return NULL_RTX;
24024 if (try_again)
24026 if (swap_operands)
24027 std::swap (op0, op1);
24029 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
24030 if (mask)
24031 return mask;
24034 /* You only get two chances. */
24035 return NULL_RTX;
24038 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
24039 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
24040 operands for the relation operation COND. */
24043 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
24044 rtx cond, rtx cc_op0, rtx cc_op1)
24046 machine_mode dest_mode = GET_MODE (dest);
24047 machine_mode mask_mode = GET_MODE (cc_op0);
24048 enum rtx_code rcode = GET_CODE (cond);
24049 machine_mode cc_mode = CCmode;
24050 rtx mask;
24051 rtx cond2;
24052 rtx tmp;
24053 bool invert_move = false;
24055 if (VECTOR_UNIT_NONE_P (dest_mode))
24056 return 0;
24058 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
24059 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
24061 switch (rcode)
24063 /* Swap operands if we can, and fall back to doing the operation as
24064 specified, and doing a NOR to invert the test. */
24065 case NE:
24066 case UNLE:
24067 case UNLT:
24068 case UNGE:
24069 case UNGT:
24070 /* Invert condition and try again.
24071 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
24072 invert_move = true;
24073 rcode = reverse_condition_maybe_unordered (rcode);
24074 if (rcode == UNKNOWN)
24075 return 0;
24076 break;
24078 /* Mark unsigned tests with CCUNSmode. */
24079 case GTU:
24080 case GEU:
24081 case LTU:
24082 case LEU:
24083 cc_mode = CCUNSmode;
24084 break;
24086 default:
24087 break;
24090 /* Get the vector mask for the given relational operations. */
24091 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
24093 if (!mask)
24094 return 0;
24096 if (invert_move)
24098 tmp = op_true;
24099 op_true = op_false;
24100 op_false = tmp;
24103 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
24104 CONST0_RTX (dest_mode));
24105 emit_insn (gen_rtx_SET (dest,
24106 gen_rtx_IF_THEN_ELSE (dest_mode,
24107 cond2,
24108 op_true,
24109 op_false)));
24110 return 1;
24113 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
24114 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
24115 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
24116 hardware has no such operation. */
24118 static int
24119 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24121 enum rtx_code code = GET_CODE (op);
24122 rtx op0 = XEXP (op, 0);
24123 rtx op1 = XEXP (op, 1);
24124 machine_mode compare_mode = GET_MODE (op0);
24125 machine_mode result_mode = GET_MODE (dest);
24126 bool max_p = false;
24128 if (result_mode != compare_mode)
24129 return 0;
24131 if (code == GE || code == GT)
24132 max_p = true;
24133 else if (code == LE || code == LT)
24134 max_p = false;
24135 else
24136 return 0;
24138 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
24141 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
24142 max_p = !max_p;
24144 else
24145 return 0;
24147 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
24148 return 1;
24151 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
24152 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
24153 operands of the last comparison is nonzero/true, FALSE_COND if it is
24154 zero/false. Return 0 if the hardware has no such operation. */
24156 static int
24157 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24159 enum rtx_code code = GET_CODE (op);
24160 rtx op0 = XEXP (op, 0);
24161 rtx op1 = XEXP (op, 1);
24162 machine_mode result_mode = GET_MODE (dest);
24163 rtx compare_rtx;
24164 rtx cmove_rtx;
24165 rtx clobber_rtx;
24167 if (!can_create_pseudo_p ())
24168 return 0;
24170 switch (code)
24172 case EQ:
24173 case GE:
24174 case GT:
24175 break;
24177 case NE:
24178 case LT:
24179 case LE:
24180 code = swap_condition (code);
24181 std::swap (op0, op1);
24182 break;
24184 default:
24185 return 0;
24188 /* Generate: [(parallel [(set (dest)
24189 (if_then_else (op (cmp1) (cmp2))
24190 (true)
24191 (false)))
24192 (clobber (scratch))])]. */
24194 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
24195 cmove_rtx = gen_rtx_SET (dest,
24196 gen_rtx_IF_THEN_ELSE (result_mode,
24197 compare_rtx,
24198 true_cond,
24199 false_cond));
24201 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
24202 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24203 gen_rtvec (2, cmove_rtx, clobber_rtx)));
24205 return 1;
24208 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
24209 operands of the last comparison is nonzero/true, FALSE_COND if it
24210 is zero/false. Return 0 if the hardware has no such operation. */
24213 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24215 enum rtx_code code = GET_CODE (op);
24216 rtx op0 = XEXP (op, 0);
24217 rtx op1 = XEXP (op, 1);
24218 machine_mode compare_mode = GET_MODE (op0);
24219 machine_mode result_mode = GET_MODE (dest);
24220 rtx temp;
24221 bool is_against_zero;
24223 /* These modes should always match. */
24224 if (GET_MODE (op1) != compare_mode
24225 /* In the isel case however, we can use a compare immediate, so
24226 op1 may be a small constant. */
24227 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
24228 return 0;
24229 if (GET_MODE (true_cond) != result_mode)
24230 return 0;
24231 if (GET_MODE (false_cond) != result_mode)
24232 return 0;
24234 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
24235 if (TARGET_P9_MINMAX
24236 && (compare_mode == SFmode || compare_mode == DFmode)
24237 && (result_mode == SFmode || result_mode == DFmode))
24239 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
24240 return 1;
24242 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
24243 return 1;
24246 /* Don't allow using floating point comparisons for integer results for
24247 now. */
24248 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
24249 return 0;
24251 /* First, work out if the hardware can do this at all, or
24252 if it's too slow.... */
24253 if (!FLOAT_MODE_P (compare_mode))
24255 if (TARGET_ISEL)
24256 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
24257 return 0;
24259 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
24260 && SCALAR_FLOAT_MODE_P (compare_mode))
24261 return 0;
24263 is_against_zero = op1 == CONST0_RTX (compare_mode);
24265 /* A floating-point subtract might overflow, underflow, or produce
24266 an inexact result, thus changing the floating-point flags, so it
24267 can't be generated if we care about that. It's safe if one side
24268 of the construct is zero, since then no subtract will be
24269 generated. */
24270 if (SCALAR_FLOAT_MODE_P (compare_mode)
24271 && flag_trapping_math && ! is_against_zero)
24272 return 0;
24274 /* Eliminate half of the comparisons by switching operands, this
24275 makes the remaining code simpler. */
24276 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
24277 || code == LTGT || code == LT || code == UNLE)
24279 code = reverse_condition_maybe_unordered (code);
24280 temp = true_cond;
24281 true_cond = false_cond;
24282 false_cond = temp;
24285 /* UNEQ and LTGT take four instructions for a comparison with zero,
24286 it'll probably be faster to use a branch here too. */
24287 if (code == UNEQ && HONOR_NANS (compare_mode))
24288 return 0;
24290 /* We're going to try to implement comparisons by performing
24291 a subtract, then comparing against zero. Unfortunately,
24292 Inf - Inf is NaN which is not zero, and so if we don't
24293 know that the operand is finite and the comparison
24294 would treat EQ different to UNORDERED, we can't do it. */
24295 if (HONOR_INFINITIES (compare_mode)
24296 && code != GT && code != UNGE
24297 && (GET_CODE (op1) != CONST_DOUBLE
24298 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
24299 /* Constructs of the form (a OP b ? a : b) are safe. */
24300 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
24301 || (! rtx_equal_p (op0, true_cond)
24302 && ! rtx_equal_p (op1, true_cond))))
24303 return 0;
24305 /* At this point we know we can use fsel. */
24307 /* Reduce the comparison to a comparison against zero. */
24308 if (! is_against_zero)
24310 temp = gen_reg_rtx (compare_mode);
24311 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
24312 op0 = temp;
24313 op1 = CONST0_RTX (compare_mode);
24316 /* If we don't care about NaNs we can reduce some of the comparisons
24317 down to faster ones. */
24318 if (! HONOR_NANS (compare_mode))
24319 switch (code)
24321 case GT:
24322 code = LE;
24323 temp = true_cond;
24324 true_cond = false_cond;
24325 false_cond = temp;
24326 break;
24327 case UNGE:
24328 code = GE;
24329 break;
24330 case UNEQ:
24331 code = EQ;
24332 break;
24333 default:
24334 break;
24337 /* Now, reduce everything down to a GE. */
24338 switch (code)
24340 case GE:
24341 break;
24343 case LE:
24344 temp = gen_reg_rtx (compare_mode);
24345 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24346 op0 = temp;
24347 break;
24349 case ORDERED:
24350 temp = gen_reg_rtx (compare_mode);
24351 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
24352 op0 = temp;
24353 break;
24355 case EQ:
24356 temp = gen_reg_rtx (compare_mode);
24357 emit_insn (gen_rtx_SET (temp,
24358 gen_rtx_NEG (compare_mode,
24359 gen_rtx_ABS (compare_mode, op0))));
24360 op0 = temp;
24361 break;
24363 case UNGE:
24364 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
24365 temp = gen_reg_rtx (result_mode);
24366 emit_insn (gen_rtx_SET (temp,
24367 gen_rtx_IF_THEN_ELSE (result_mode,
24368 gen_rtx_GE (VOIDmode,
24369 op0, op1),
24370 true_cond, false_cond)));
24371 false_cond = true_cond;
24372 true_cond = temp;
24374 temp = gen_reg_rtx (compare_mode);
24375 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24376 op0 = temp;
24377 break;
24379 case GT:
24380 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
24381 temp = gen_reg_rtx (result_mode);
24382 emit_insn (gen_rtx_SET (temp,
24383 gen_rtx_IF_THEN_ELSE (result_mode,
24384 gen_rtx_GE (VOIDmode,
24385 op0, op1),
24386 true_cond, false_cond)));
24387 true_cond = false_cond;
24388 false_cond = temp;
24390 temp = gen_reg_rtx (compare_mode);
24391 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24392 op0 = temp;
24393 break;
24395 default:
24396 gcc_unreachable ();
24399 emit_insn (gen_rtx_SET (dest,
24400 gen_rtx_IF_THEN_ELSE (result_mode,
24401 gen_rtx_GE (VOIDmode,
24402 op0, op1),
24403 true_cond, false_cond)));
24404 return 1;
24407 /* Same as above, but for ints (isel). */
24409 static int
24410 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24412 rtx condition_rtx, cr;
24413 machine_mode mode = GET_MODE (dest);
24414 enum rtx_code cond_code;
24415 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
24416 bool signedp;
24418 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
24419 return 0;
24421 /* We still have to do the compare, because isel doesn't do a
24422 compare, it just looks at the CRx bits set by a previous compare
24423 instruction. */
24424 condition_rtx = rs6000_generate_compare (op, mode);
24425 cond_code = GET_CODE (condition_rtx);
24426 cr = XEXP (condition_rtx, 0);
24427 signedp = GET_MODE (cr) == CCmode;
24429 isel_func = (mode == SImode
24430 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
24431 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
24433 switch (cond_code)
24435 case LT: case GT: case LTU: case GTU: case EQ:
24436 /* isel handles these directly. */
24437 break;
24439 default:
24440 /* We need to swap the sense of the comparison. */
24442 std::swap (false_cond, true_cond);
24443 PUT_CODE (condition_rtx, reverse_condition (cond_code));
24445 break;
24448 false_cond = force_reg (mode, false_cond);
24449 if (true_cond != const0_rtx)
24450 true_cond = force_reg (mode, true_cond);
24452 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
24454 return 1;
24457 const char *
24458 output_isel (rtx *operands)
24460 enum rtx_code code;
24462 code = GET_CODE (operands[1]);
24464 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
24466 gcc_assert (GET_CODE (operands[2]) == REG
24467 && GET_CODE (operands[3]) == REG);
24468 PUT_CODE (operands[1], reverse_condition (code));
24469 return "isel %0,%3,%2,%j1";
24472 return "isel %0,%2,%3,%j1";
24475 void
24476 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
24478 machine_mode mode = GET_MODE (op0);
24479 enum rtx_code c;
24480 rtx target;
24482 /* VSX/altivec have direct min/max insns. */
24483 if ((code == SMAX || code == SMIN)
24484 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
24485 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
24487 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
24488 return;
24491 if (code == SMAX || code == SMIN)
24492 c = GE;
24493 else
24494 c = GEU;
24496 if (code == SMAX || code == UMAX)
24497 target = emit_conditional_move (dest, c, op0, op1, mode,
24498 op0, op1, mode, 0);
24499 else
24500 target = emit_conditional_move (dest, c, op0, op1, mode,
24501 op1, op0, mode, 0);
24502 gcc_assert (target);
24503 if (target != dest)
24504 emit_move_insn (dest, target);
24507 /* Split a signbit operation on 64-bit machines with direct move. Also allow
24508 for the value to come from memory or if it is already loaded into a GPR. */
24510 void
24511 rs6000_split_signbit (rtx dest, rtx src)
24513 machine_mode d_mode = GET_MODE (dest);
24514 machine_mode s_mode = GET_MODE (src);
24515 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
24516 rtx shift_reg = dest_di;
24518 gcc_assert (REG_P (dest));
24519 gcc_assert (REG_P (src) || MEM_P (src));
24520 gcc_assert (s_mode == KFmode || s_mode == TFmode);
24522 if (MEM_P (src))
24524 rtx mem = (WORDS_BIG_ENDIAN
24525 ? adjust_address (src, DImode, 0)
24526 : adjust_address (src, DImode, 8));
24527 emit_insn (gen_rtx_SET (dest_di, mem));
24530 else
24532 unsigned int r = REGNO (src);
24534 /* If this is a VSX register, generate the special mfvsrd instruction
24535 to get it in a GPR. Until we support SF and DF modes, that will
24536 always be true. */
24537 gcc_assert (VSX_REGNO_P (r));
24539 if (s_mode == KFmode)
24540 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
24541 else
24542 emit_insn (gen_signbittf2_dm2 (dest_di, src));
24545 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
24546 return;
24549 /* A subroutine of the atomic operation splitters. Jump to LABEL if
24550 COND is true. Mark the jump as unlikely to be taken. */
24552 static void
24553 emit_unlikely_jump (rtx cond, rtx label)
24555 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
24556 rtx x;
24558 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
24559 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
24560 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
24563 /* A subroutine of the atomic operation splitters. Emit a load-locked
24564 instruction in MODE. For QI/HImode, possibly use a pattern than includes
24565 the zero_extend operation. */
24567 static void
24568 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
24570 rtx (*fn) (rtx, rtx) = NULL;
24572 switch (mode)
24574 case QImode:
24575 fn = gen_load_lockedqi;
24576 break;
24577 case HImode:
24578 fn = gen_load_lockedhi;
24579 break;
24580 case SImode:
24581 if (GET_MODE (mem) == QImode)
24582 fn = gen_load_lockedqi_si;
24583 else if (GET_MODE (mem) == HImode)
24584 fn = gen_load_lockedhi_si;
24585 else
24586 fn = gen_load_lockedsi;
24587 break;
24588 case DImode:
24589 fn = gen_load_lockeddi;
24590 break;
24591 case TImode:
24592 fn = gen_load_lockedti;
24593 break;
24594 default:
24595 gcc_unreachable ();
24597 emit_insn (fn (reg, mem));
24600 /* A subroutine of the atomic operation splitters. Emit a store-conditional
24601 instruction in MODE. */
24603 static void
24604 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
24606 rtx (*fn) (rtx, rtx, rtx) = NULL;
24608 switch (mode)
24610 case QImode:
24611 fn = gen_store_conditionalqi;
24612 break;
24613 case HImode:
24614 fn = gen_store_conditionalhi;
24615 break;
24616 case SImode:
24617 fn = gen_store_conditionalsi;
24618 break;
24619 case DImode:
24620 fn = gen_store_conditionaldi;
24621 break;
24622 case TImode:
24623 fn = gen_store_conditionalti;
24624 break;
24625 default:
24626 gcc_unreachable ();
24629 /* Emit sync before stwcx. to address PPC405 Erratum. */
24630 if (PPC405_ERRATUM77)
24631 emit_insn (gen_hwsync ());
24633 emit_insn (fn (res, mem, val));
24636 /* Expand barriers before and after a load_locked/store_cond sequence. */
24638 static rtx
24639 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
24641 rtx addr = XEXP (mem, 0);
24642 int strict_p = (reload_in_progress || reload_completed);
24644 if (!legitimate_indirect_address_p (addr, strict_p)
24645 && !legitimate_indexed_address_p (addr, strict_p))
24647 addr = force_reg (Pmode, addr);
24648 mem = replace_equiv_address_nv (mem, addr);
24651 switch (model)
24653 case MEMMODEL_RELAXED:
24654 case MEMMODEL_CONSUME:
24655 case MEMMODEL_ACQUIRE:
24656 break;
24657 case MEMMODEL_RELEASE:
24658 case MEMMODEL_ACQ_REL:
24659 emit_insn (gen_lwsync ());
24660 break;
24661 case MEMMODEL_SEQ_CST:
24662 emit_insn (gen_hwsync ());
24663 break;
24664 default:
24665 gcc_unreachable ();
24667 return mem;
24670 static void
24671 rs6000_post_atomic_barrier (enum memmodel model)
24673 switch (model)
24675 case MEMMODEL_RELAXED:
24676 case MEMMODEL_CONSUME:
24677 case MEMMODEL_RELEASE:
24678 break;
24679 case MEMMODEL_ACQUIRE:
24680 case MEMMODEL_ACQ_REL:
24681 case MEMMODEL_SEQ_CST:
24682 emit_insn (gen_isync ());
24683 break;
24684 default:
24685 gcc_unreachable ();
24689 /* A subroutine of the various atomic expanders. For sub-word operations,
24690 we must adjust things to operate on SImode. Given the original MEM,
24691 return a new aligned memory. Also build and return the quantities by
24692 which to shift and mask. */
24694 static rtx
24695 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
24697 rtx addr, align, shift, mask, mem;
24698 HOST_WIDE_INT shift_mask;
24699 machine_mode mode = GET_MODE (orig_mem);
24701 /* For smaller modes, we have to implement this via SImode. */
24702 shift_mask = (mode == QImode ? 0x18 : 0x10);
24704 addr = XEXP (orig_mem, 0);
24705 addr = force_reg (GET_MODE (addr), addr);
24707 /* Aligned memory containing subword. Generate a new memory. We
24708 do not want any of the existing MEM_ATTR data, as we're now
24709 accessing memory outside the original object. */
24710 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
24711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24712 mem = gen_rtx_MEM (SImode, align);
24713 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
24714 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
24715 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
24717 /* Shift amount for subword relative to aligned word. */
24718 shift = gen_reg_rtx (SImode);
24719 addr = gen_lowpart (SImode, addr);
24720 rtx tmp = gen_reg_rtx (SImode);
24721 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
24722 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
24723 if (BYTES_BIG_ENDIAN)
24724 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
24725 shift, 1, OPTAB_LIB_WIDEN);
24726 *pshift = shift;
24728 /* Mask for insertion. */
24729 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
24730 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
24731 *pmask = mask;
24733 return mem;
24736 /* A subroutine of the various atomic expanders. For sub-word operands,
24737 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
24739 static rtx
24740 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
24742 rtx x;
24744 x = gen_reg_rtx (SImode);
24745 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
24746 gen_rtx_NOT (SImode, mask),
24747 oldval)));
24749 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
24751 return x;
24754 /* A subroutine of the various atomic expanders. For sub-word operands,
24755 extract WIDE to NARROW via SHIFT. */
24757 static void
24758 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
24760 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
24761 wide, 1, OPTAB_LIB_WIDEN);
24762 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
24765 /* Expand an atomic compare and swap operation. */
24767 void
24768 rs6000_expand_atomic_compare_and_swap (rtx operands[])
24770 rtx boolval, retval, mem, oldval, newval, cond;
24771 rtx label1, label2, x, mask, shift;
24772 machine_mode mode, orig_mode;
24773 enum memmodel mod_s, mod_f;
24774 bool is_weak;
24776 boolval = operands[0];
24777 retval = operands[1];
24778 mem = operands[2];
24779 oldval = operands[3];
24780 newval = operands[4];
24781 is_weak = (INTVAL (operands[5]) != 0);
24782 mod_s = memmodel_base (INTVAL (operands[6]));
24783 mod_f = memmodel_base (INTVAL (operands[7]));
24784 orig_mode = mode = GET_MODE (mem);
24786 mask = shift = NULL_RTX;
24787 if (mode == QImode || mode == HImode)
24789 /* Before power8, we didn't have access to lbarx/lharx, so generate a
24790 lwarx and shift/mask operations. With power8, we need to do the
24791 comparison in SImode, but the store is still done in QI/HImode. */
24792 oldval = convert_modes (SImode, mode, oldval, 1);
24794 if (!TARGET_SYNC_HI_QI)
24796 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24798 /* Shift and mask OLDVAL into position with the word. */
24799 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
24800 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24802 /* Shift and mask NEWVAL into position within the word. */
24803 newval = convert_modes (SImode, mode, newval, 1);
24804 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
24805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24808 /* Prepare to adjust the return value. */
24809 retval = gen_reg_rtx (SImode);
24810 mode = SImode;
24812 else if (reg_overlap_mentioned_p (retval, oldval))
24813 oldval = copy_to_reg (oldval);
24815 if (mode != TImode && !reg_or_short_operand (oldval, mode))
24816 oldval = copy_to_mode_reg (mode, oldval);
24818 if (reg_overlap_mentioned_p (retval, newval))
24819 newval = copy_to_reg (newval);
24821 mem = rs6000_pre_atomic_barrier (mem, mod_s);
24823 label1 = NULL_RTX;
24824 if (!is_weak)
24826 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24827 emit_label (XEXP (label1, 0));
24829 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24831 emit_load_locked (mode, retval, mem);
24833 x = retval;
24834 if (mask)
24835 x = expand_simple_binop (SImode, AND, retval, mask,
24836 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24838 cond = gen_reg_rtx (CCmode);
24839 /* If we have TImode, synthesize a comparison. */
24840 if (mode != TImode)
24841 x = gen_rtx_COMPARE (CCmode, x, oldval);
24842 else
24844 rtx xor1_result = gen_reg_rtx (DImode);
24845 rtx xor2_result = gen_reg_rtx (DImode);
24846 rtx or_result = gen_reg_rtx (DImode);
24847 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
24848 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
24849 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
24850 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
24852 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
24853 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
24854 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
24855 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
24858 emit_insn (gen_rtx_SET (cond, x));
24860 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24861 emit_unlikely_jump (x, label2);
24863 x = newval;
24864 if (mask)
24865 x = rs6000_mask_atomic_subword (retval, newval, mask);
24867 emit_store_conditional (orig_mode, cond, mem, x);
24869 if (!is_weak)
24871 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24872 emit_unlikely_jump (x, label1);
24875 if (!is_mm_relaxed (mod_f))
24876 emit_label (XEXP (label2, 0));
24878 rs6000_post_atomic_barrier (mod_s);
24880 if (is_mm_relaxed (mod_f))
24881 emit_label (XEXP (label2, 0));
24883 if (shift)
24884 rs6000_finish_atomic_subword (operands[1], retval, shift);
24885 else if (mode != GET_MODE (operands[1]))
24886 convert_move (operands[1], retval, 1);
24888 /* In all cases, CR0 contains EQ on success, and NE on failure. */
24889 x = gen_rtx_EQ (SImode, cond, const0_rtx);
24890 emit_insn (gen_rtx_SET (boolval, x));
24893 /* Expand an atomic exchange operation. */
24895 void
24896 rs6000_expand_atomic_exchange (rtx operands[])
24898 rtx retval, mem, val, cond;
24899 machine_mode mode;
24900 enum memmodel model;
24901 rtx label, x, mask, shift;
24903 retval = operands[0];
24904 mem = operands[1];
24905 val = operands[2];
24906 model = memmodel_base (INTVAL (operands[3]));
24907 mode = GET_MODE (mem);
24909 mask = shift = NULL_RTX;
24910 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
24912 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24914 /* Shift and mask VAL into position with the word. */
24915 val = convert_modes (SImode, mode, val, 1);
24916 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24917 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24919 /* Prepare to adjust the return value. */
24920 retval = gen_reg_rtx (SImode);
24921 mode = SImode;
24924 mem = rs6000_pre_atomic_barrier (mem, model);
24926 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24927 emit_label (XEXP (label, 0));
24929 emit_load_locked (mode, retval, mem);
24931 x = val;
24932 if (mask)
24933 x = rs6000_mask_atomic_subword (retval, val, mask);
24935 cond = gen_reg_rtx (CCmode);
24936 emit_store_conditional (mode, cond, mem, x);
24938 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24939 emit_unlikely_jump (x, label);
24941 rs6000_post_atomic_barrier (model);
24943 if (shift)
24944 rs6000_finish_atomic_subword (operands[0], retval, shift);
24947 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
24948 to perform. MEM is the memory on which to operate. VAL is the second
24949 operand of the binary operator. BEFORE and AFTER are optional locations to
24950 return the value of MEM either before of after the operation. MODEL_RTX
24951 is a CONST_INT containing the memory model to use. */
24953 void
24954 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
24955 rtx orig_before, rtx orig_after, rtx model_rtx)
24957 enum memmodel model = memmodel_base (INTVAL (model_rtx));
24958 machine_mode mode = GET_MODE (mem);
24959 machine_mode store_mode = mode;
24960 rtx label, x, cond, mask, shift;
24961 rtx before = orig_before, after = orig_after;
24963 mask = shift = NULL_RTX;
24964 /* On power8, we want to use SImode for the operation. On previous systems,
24965 use the operation in a subword and shift/mask to get the proper byte or
24966 halfword. */
24967 if (mode == QImode || mode == HImode)
24969 if (TARGET_SYNC_HI_QI)
24971 val = convert_modes (SImode, mode, val, 1);
24973 /* Prepare to adjust the return value. */
24974 before = gen_reg_rtx (SImode);
24975 if (after)
24976 after = gen_reg_rtx (SImode);
24977 mode = SImode;
24979 else
24981 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24983 /* Shift and mask VAL into position with the word. */
24984 val = convert_modes (SImode, mode, val, 1);
24985 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24986 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24988 switch (code)
24990 case IOR:
24991 case XOR:
24992 /* We've already zero-extended VAL. That is sufficient to
24993 make certain that it does not affect other bits. */
24994 mask = NULL;
24995 break;
24997 case AND:
24998 /* If we make certain that all of the other bits in VAL are
24999 set, that will be sufficient to not affect other bits. */
25000 x = gen_rtx_NOT (SImode, mask);
25001 x = gen_rtx_IOR (SImode, x, val);
25002 emit_insn (gen_rtx_SET (val, x));
25003 mask = NULL;
25004 break;
25006 case NOT:
25007 case PLUS:
25008 case MINUS:
25009 /* These will all affect bits outside the field and need
25010 adjustment via MASK within the loop. */
25011 break;
25013 default:
25014 gcc_unreachable ();
25017 /* Prepare to adjust the return value. */
25018 before = gen_reg_rtx (SImode);
25019 if (after)
25020 after = gen_reg_rtx (SImode);
25021 store_mode = mode = SImode;
25025 mem = rs6000_pre_atomic_barrier (mem, model);
25027 label = gen_label_rtx ();
25028 emit_label (label);
25029 label = gen_rtx_LABEL_REF (VOIDmode, label);
25031 if (before == NULL_RTX)
25032 before = gen_reg_rtx (mode);
25034 emit_load_locked (mode, before, mem);
25036 if (code == NOT)
25038 x = expand_simple_binop (mode, AND, before, val,
25039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25040 after = expand_simple_unop (mode, NOT, x, after, 1);
25042 else
25044 after = expand_simple_binop (mode, code, before, val,
25045 after, 1, OPTAB_LIB_WIDEN);
25048 x = after;
25049 if (mask)
25051 x = expand_simple_binop (SImode, AND, after, mask,
25052 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25053 x = rs6000_mask_atomic_subword (before, x, mask);
25055 else if (store_mode != mode)
25056 x = convert_modes (store_mode, mode, x, 1);
25058 cond = gen_reg_rtx (CCmode);
25059 emit_store_conditional (store_mode, cond, mem, x);
25061 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25062 emit_unlikely_jump (x, label);
25064 rs6000_post_atomic_barrier (model);
25066 if (shift)
25068 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
25069 then do the calcuations in a SImode register. */
25070 if (orig_before)
25071 rs6000_finish_atomic_subword (orig_before, before, shift);
25072 if (orig_after)
25073 rs6000_finish_atomic_subword (orig_after, after, shift);
25075 else if (store_mode != mode)
25077 /* QImode/HImode on machines with lbarx/lharx where we do the native
25078 operation and then do the calcuations in a SImode register. */
25079 if (orig_before)
25080 convert_move (orig_before, before, 1);
25081 if (orig_after)
25082 convert_move (orig_after, after, 1);
25084 else if (orig_after && after != orig_after)
25085 emit_move_insn (orig_after, after);
25088 /* Emit instructions to move SRC to DST. Called by splitters for
25089 multi-register moves. It will emit at most one instruction for
25090 each register that is accessed; that is, it won't emit li/lis pairs
25091 (or equivalent for 64-bit code). One of SRC or DST must be a hard
25092 register. */
25094 void
25095 rs6000_split_multireg_move (rtx dst, rtx src)
25097 /* The register number of the first register being moved. */
25098 int reg;
25099 /* The mode that is to be moved. */
25100 machine_mode mode;
25101 /* The mode that the move is being done in, and its size. */
25102 machine_mode reg_mode;
25103 int reg_mode_size;
25104 /* The number of registers that will be moved. */
25105 int nregs;
25107 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
25108 mode = GET_MODE (dst);
25109 nregs = hard_regno_nregs[reg][mode];
25110 if (FP_REGNO_P (reg))
25111 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
25112 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
25113 else if (ALTIVEC_REGNO_P (reg))
25114 reg_mode = V16QImode;
25115 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
25116 reg_mode = DFmode;
25117 else
25118 reg_mode = word_mode;
25119 reg_mode_size = GET_MODE_SIZE (reg_mode);
25121 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
25123 /* TDmode residing in FP registers is special, since the ISA requires that
25124 the lower-numbered word of a register pair is always the most significant
25125 word, even in little-endian mode. This does not match the usual subreg
25126 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
25127 the appropriate constituent registers "by hand" in little-endian mode.
25129 Note we do not need to check for destructive overlap here since TDmode
25130 can only reside in even/odd register pairs. */
25131 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
25133 rtx p_src, p_dst;
25134 int i;
25136 for (i = 0; i < nregs; i++)
25138 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
25139 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
25140 else
25141 p_src = simplify_gen_subreg (reg_mode, src, mode,
25142 i * reg_mode_size);
25144 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
25145 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
25146 else
25147 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
25148 i * reg_mode_size);
25150 emit_insn (gen_rtx_SET (p_dst, p_src));
25153 return;
25156 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
25158 /* Move register range backwards, if we might have destructive
25159 overlap. */
25160 int i;
25161 for (i = nregs - 1; i >= 0; i--)
25162 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25163 i * reg_mode_size),
25164 simplify_gen_subreg (reg_mode, src, mode,
25165 i * reg_mode_size)));
25167 else
25169 int i;
25170 int j = -1;
25171 bool used_update = false;
25172 rtx restore_basereg = NULL_RTX;
25174 if (MEM_P (src) && INT_REGNO_P (reg))
25176 rtx breg;
25178 if (GET_CODE (XEXP (src, 0)) == PRE_INC
25179 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
25181 rtx delta_rtx;
25182 breg = XEXP (XEXP (src, 0), 0);
25183 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
25184 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
25185 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
25186 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25187 src = replace_equiv_address (src, breg);
25189 else if (! rs6000_offsettable_memref_p (src, reg_mode))
25191 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
25193 rtx basereg = XEXP (XEXP (src, 0), 0);
25194 if (TARGET_UPDATE)
25196 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
25197 emit_insn (gen_rtx_SET (ndst,
25198 gen_rtx_MEM (reg_mode,
25199 XEXP (src, 0))));
25200 used_update = true;
25202 else
25203 emit_insn (gen_rtx_SET (basereg,
25204 XEXP (XEXP (src, 0), 1)));
25205 src = replace_equiv_address (src, basereg);
25207 else
25209 rtx basereg = gen_rtx_REG (Pmode, reg);
25210 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
25211 src = replace_equiv_address (src, basereg);
25215 breg = XEXP (src, 0);
25216 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
25217 breg = XEXP (breg, 0);
25219 /* If the base register we are using to address memory is
25220 also a destination reg, then change that register last. */
25221 if (REG_P (breg)
25222 && REGNO (breg) >= REGNO (dst)
25223 && REGNO (breg) < REGNO (dst) + nregs)
25224 j = REGNO (breg) - REGNO (dst);
25226 else if (MEM_P (dst) && INT_REGNO_P (reg))
25228 rtx breg;
25230 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
25231 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
25233 rtx delta_rtx;
25234 breg = XEXP (XEXP (dst, 0), 0);
25235 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
25236 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
25237 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
25239 /* We have to update the breg before doing the store.
25240 Use store with update, if available. */
25242 if (TARGET_UPDATE)
25244 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25245 emit_insn (TARGET_32BIT
25246 ? (TARGET_POWERPC64
25247 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
25248 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
25249 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
25250 used_update = true;
25252 else
25253 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
25254 dst = replace_equiv_address (dst, breg);
25256 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
25257 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
25259 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
25261 rtx basereg = XEXP (XEXP (dst, 0), 0);
25262 if (TARGET_UPDATE)
25264 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
25265 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
25266 XEXP (dst, 0)),
25267 nsrc));
25268 used_update = true;
25270 else
25271 emit_insn (gen_rtx_SET (basereg,
25272 XEXP (XEXP (dst, 0), 1)));
25273 dst = replace_equiv_address (dst, basereg);
25275 else
25277 rtx basereg = XEXP (XEXP (dst, 0), 0);
25278 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
25279 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
25280 && REG_P (basereg)
25281 && REG_P (offsetreg)
25282 && REGNO (basereg) != REGNO (offsetreg));
25283 if (REGNO (basereg) == 0)
25285 rtx tmp = offsetreg;
25286 offsetreg = basereg;
25287 basereg = tmp;
25289 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
25290 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
25291 dst = replace_equiv_address (dst, basereg);
25294 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
25295 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
25298 for (i = 0; i < nregs; i++)
25300 /* Calculate index to next subword. */
25301 ++j;
25302 if (j == nregs)
25303 j = 0;
25305 /* If compiler already emitted move of first word by
25306 store with update, no need to do anything. */
25307 if (j == 0 && used_update)
25308 continue;
25310 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
25311 j * reg_mode_size),
25312 simplify_gen_subreg (reg_mode, src, mode,
25313 j * reg_mode_size)));
25315 if (restore_basereg != NULL_RTX)
25316 emit_insn (restore_basereg);
25321 /* This page contains routines that are used to determine what the
25322 function prologue and epilogue code will do and write them out. */
25324 static inline bool
25325 save_reg_p (int r)
25327 return !call_used_regs[r] && df_regs_ever_live_p (r);
25330 /* Determine whether the gp REG is really used. */
25332 static bool
25333 rs6000_reg_live_or_pic_offset_p (int reg)
25335 /* We need to mark the PIC offset register live for the same conditions
25336 as it is set up, or otherwise it won't be saved before we clobber it. */
25338 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
25340 if (TARGET_TOC && TARGET_MINIMAL_TOC
25341 && (crtl->calls_eh_return
25342 || df_regs_ever_live_p (reg)
25343 || get_pool_size ()))
25344 return true;
25346 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
25347 && flag_pic)
25348 return true;
25351 /* If the function calls eh_return, claim used all the registers that would
25352 be checked for liveness otherwise. */
25354 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
25355 && !call_used_regs[reg]);
25358 /* Return the first fixed-point register that is required to be
25359 saved. 32 if none. */
25362 first_reg_to_save (void)
25364 int first_reg;
25366 /* Find lowest numbered live register. */
25367 for (first_reg = 13; first_reg <= 31; first_reg++)
25368 if (save_reg_p (first_reg))
25369 break;
25371 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
25372 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
25373 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25374 || (TARGET_TOC && TARGET_MINIMAL_TOC))
25375 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
25376 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
25378 #if TARGET_MACHO
25379 if (flag_pic
25380 && crtl->uses_pic_offset_table
25381 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
25382 return RS6000_PIC_OFFSET_TABLE_REGNUM;
25383 #endif
25385 return first_reg;
25388 /* Similar, for FP regs. */
25391 first_fp_reg_to_save (void)
25393 int first_reg;
25395 /* Find lowest numbered live register. */
25396 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
25397 if (save_reg_p (first_reg))
25398 break;
25400 return first_reg;
25403 /* Similar, for AltiVec regs. */
25405 static int
25406 first_altivec_reg_to_save (void)
25408 int i;
25410 /* Stack frame remains as is unless we are in AltiVec ABI. */
25411 if (! TARGET_ALTIVEC_ABI)
25412 return LAST_ALTIVEC_REGNO + 1;
25414 /* On Darwin, the unwind routines are compiled without
25415 TARGET_ALTIVEC, and use save_world to save/restore the
25416 altivec registers when necessary. */
25417 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25418 && ! TARGET_ALTIVEC)
25419 return FIRST_ALTIVEC_REGNO + 20;
25421 /* Find lowest numbered live register. */
25422 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
25423 if (save_reg_p (i))
25424 break;
25426 return i;
25429 /* Return a 32-bit mask of the AltiVec registers we need to set in
25430 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
25431 the 32-bit word is 0. */
25433 static unsigned int
25434 compute_vrsave_mask (void)
25436 unsigned int i, mask = 0;
25438 /* On Darwin, the unwind routines are compiled without
25439 TARGET_ALTIVEC, and use save_world to save/restore the
25440 call-saved altivec registers when necessary. */
25441 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25442 && ! TARGET_ALTIVEC)
25443 mask |= 0xFFF;
25445 /* First, find out if we use _any_ altivec registers. */
25446 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25447 if (df_regs_ever_live_p (i))
25448 mask |= ALTIVEC_REG_BIT (i);
25450 if (mask == 0)
25451 return mask;
25453 /* Next, remove the argument registers from the set. These must
25454 be in the VRSAVE mask set by the caller, so we don't need to add
25455 them in again. More importantly, the mask we compute here is
25456 used to generate CLOBBERs in the set_vrsave insn, and we do not
25457 wish the argument registers to die. */
25458 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
25459 mask &= ~ALTIVEC_REG_BIT (i);
25461 /* Similarly, remove the return value from the set. */
25463 bool yes = false;
25464 diddle_return_value (is_altivec_return_reg, &yes);
25465 if (yes)
25466 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
25469 return mask;
25472 /* For a very restricted set of circumstances, we can cut down the
25473 size of prologues/epilogues by calling our own save/restore-the-world
25474 routines. */
25476 static void
25477 compute_save_world_info (rs6000_stack_t *info)
25479 info->world_save_p = 1;
25480 info->world_save_p
25481 = (WORLD_SAVE_P (info)
25482 && DEFAULT_ABI == ABI_DARWIN
25483 && !cfun->has_nonlocal_label
25484 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
25485 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
25486 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
25487 && info->cr_save_p);
25489 /* This will not work in conjunction with sibcalls. Make sure there
25490 are none. (This check is expensive, but seldom executed.) */
25491 if (WORLD_SAVE_P (info))
25493 rtx_insn *insn;
25494 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
25495 if (CALL_P (insn) && SIBLING_CALL_P (insn))
25497 info->world_save_p = 0;
25498 break;
25502 if (WORLD_SAVE_P (info))
25504 /* Even if we're not touching VRsave, make sure there's room on the
25505 stack for it, if it looks like we're calling SAVE_WORLD, which
25506 will attempt to save it. */
25507 info->vrsave_size = 4;
25509 /* If we are going to save the world, we need to save the link register too. */
25510 info->lr_save_p = 1;
25512 /* "Save" the VRsave register too if we're saving the world. */
25513 if (info->vrsave_mask == 0)
25514 info->vrsave_mask = compute_vrsave_mask ();
25516 /* Because the Darwin register save/restore routines only handle
25517 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
25518 check. */
25519 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
25520 && (info->first_altivec_reg_save
25521 >= FIRST_SAVED_ALTIVEC_REGNO));
25524 return;
25528 static void
25529 is_altivec_return_reg (rtx reg, void *xyes)
25531 bool *yes = (bool *) xyes;
25532 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
25533 *yes = true;
25537 /* Return whether REG is a global user reg or has been specifed by
25538 -ffixed-REG. We should not restore these, and so cannot use
25539 lmw or out-of-line restore functions if there are any. We also
25540 can't save them (well, emit frame notes for them), because frame
25541 unwinding during exception handling will restore saved registers. */
25543 static bool
25544 fixed_reg_p (int reg)
25546 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
25547 backend sets it, overriding anything the user might have given. */
25548 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
25549 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
25550 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25551 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
25552 return false;
25554 return fixed_regs[reg];
25557 /* Determine the strategy for savings/restoring registers. */
25559 enum {
25560 SAVE_MULTIPLE = 0x1,
25561 SAVE_INLINE_GPRS = 0x2,
25562 SAVE_INLINE_FPRS = 0x4,
25563 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
25564 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
25565 SAVE_INLINE_VRS = 0x20,
25566 REST_MULTIPLE = 0x100,
25567 REST_INLINE_GPRS = 0x200,
25568 REST_INLINE_FPRS = 0x400,
25569 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
25570 REST_INLINE_VRS = 0x1000
25573 static int
25574 rs6000_savres_strategy (rs6000_stack_t *info,
25575 bool using_static_chain_p)
25577 int strategy = 0;
25579 /* Select between in-line and out-of-line save and restore of regs.
25580 First, all the obvious cases where we don't use out-of-line. */
25581 if (crtl->calls_eh_return
25582 || cfun->machine->ra_need_lr)
25583 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
25584 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
25585 | SAVE_INLINE_VRS | REST_INLINE_VRS);
25587 if (info->first_gp_reg_save == 32)
25588 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25590 if (info->first_fp_reg_save == 64
25591 /* The out-of-line FP routines use double-precision stores;
25592 we can't use those routines if we don't have such stores. */
25593 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
25594 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25596 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
25597 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25599 /* Define cutoff for using out-of-line functions to save registers. */
25600 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
25602 if (!optimize_size)
25604 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25605 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25606 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25608 else
25610 /* Prefer out-of-line restore if it will exit. */
25611 if (info->first_fp_reg_save > 61)
25612 strategy |= SAVE_INLINE_FPRS;
25613 if (info->first_gp_reg_save > 29)
25615 if (info->first_fp_reg_save == 64)
25616 strategy |= SAVE_INLINE_GPRS;
25617 else
25618 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25620 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
25621 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25624 else if (DEFAULT_ABI == ABI_DARWIN)
25626 if (info->first_fp_reg_save > 60)
25627 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25628 if (info->first_gp_reg_save > 29)
25629 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25630 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25632 else
25634 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25635 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
25636 || info->first_fp_reg_save > 61)
25637 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25638 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25639 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25642 /* Don't bother to try to save things out-of-line if r11 is occupied
25643 by the static chain. It would require too much fiddling and the
25644 static chain is rarely used anyway. FPRs are saved w.r.t the stack
25645 pointer on Darwin, and AIX uses r1 or r12. */
25646 if (using_static_chain_p
25647 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25648 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
25649 | SAVE_INLINE_GPRS
25650 | SAVE_INLINE_VRS);
25652 /* Saving CR interferes with the exit routines used on the SPE, so
25653 just punt here. */
25654 if (TARGET_SPE_ABI
25655 && info->spe_64bit_regs_used
25656 && info->cr_save_p)
25657 strategy |= REST_INLINE_GPRS;
25659 /* We can only use the out-of-line routines to restore fprs if we've
25660 saved all the registers from first_fp_reg_save in the prologue.
25661 Otherwise, we risk loading garbage. Of course, if we have saved
25662 out-of-line then we know we haven't skipped any fprs. */
25663 if ((strategy & SAVE_INLINE_FPRS)
25664 && !(strategy & REST_INLINE_FPRS))
25666 int i;
25668 for (i = info->first_fp_reg_save; i < 64; i++)
25669 if (fixed_regs[i] || !save_reg_p (i))
25671 strategy |= REST_INLINE_FPRS;
25672 break;
25676 /* Similarly, for altivec regs. */
25677 if ((strategy & SAVE_INLINE_VRS)
25678 && !(strategy & REST_INLINE_VRS))
25680 int i;
25682 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
25683 if (fixed_regs[i] || !save_reg_p (i))
25685 strategy |= REST_INLINE_VRS;
25686 break;
25690 /* info->lr_save_p isn't yet set if the only reason lr needs to be
25691 saved is an out-of-line save or restore. Set up the value for
25692 the next test (excluding out-of-line gprs). */
25693 bool lr_save_p = (info->lr_save_p
25694 || !(strategy & SAVE_INLINE_FPRS)
25695 || !(strategy & SAVE_INLINE_VRS)
25696 || !(strategy & REST_INLINE_FPRS)
25697 || !(strategy & REST_INLINE_VRS));
25699 if (TARGET_MULTIPLE
25700 && !TARGET_POWERPC64
25701 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
25702 && info->first_gp_reg_save < 31
25703 && !(flag_shrink_wrap
25704 && flag_shrink_wrap_separate
25705 && optimize_function_for_speed_p (cfun)))
25707 /* Prefer store multiple for saves over out-of-line routines,
25708 since the store-multiple instruction will always be smaller. */
25709 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
25711 /* The situation is more complicated with load multiple. We'd
25712 prefer to use the out-of-line routines for restores, since the
25713 "exit" out-of-line routines can handle the restore of LR and the
25714 frame teardown. However if doesn't make sense to use the
25715 out-of-line routine if that is the only reason we'd need to save
25716 LR, and we can't use the "exit" out-of-line gpr restore if we
25717 have saved some fprs; In those cases it is advantageous to use
25718 load multiple when available. */
25719 if (info->first_fp_reg_save != 64 || !lr_save_p)
25720 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
25723 /* Using the "exit" out-of-line routine does not improve code size
25724 if using it would require lr to be saved and if only saving one
25725 or two gprs. */
25726 else if (!lr_save_p && info->first_gp_reg_save > 29)
25727 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25729 /* We can only use load multiple or the out-of-line routines to
25730 restore gprs if we've saved all the registers from
25731 first_gp_reg_save. Otherwise, we risk loading garbage.
25732 Of course, if we have saved out-of-line or used stmw then we know
25733 we haven't skipped any gprs. */
25734 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
25735 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
25737 int i;
25739 for (i = info->first_gp_reg_save; i < 32; i++)
25740 if (fixed_reg_p (i) || !save_reg_p (i))
25742 strategy |= REST_INLINE_GPRS;
25743 strategy &= ~REST_MULTIPLE;
25744 break;
25748 if (TARGET_ELF && TARGET_64BIT)
25750 if (!(strategy & SAVE_INLINE_FPRS))
25751 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25752 else if (!(strategy & SAVE_INLINE_GPRS)
25753 && info->first_fp_reg_save == 64)
25754 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
25756 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
25757 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
25759 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
25760 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25762 return strategy;
25765 /* Calculate the stack information for the current function. This is
25766 complicated by having two separate calling sequences, the AIX calling
25767 sequence and the V.4 calling sequence.
25769 AIX (and Darwin/Mac OS X) stack frames look like:
25770 32-bit 64-bit
25771 SP----> +---------------------------------------+
25772 | back chain to caller | 0 0
25773 +---------------------------------------+
25774 | saved CR | 4 8 (8-11)
25775 +---------------------------------------+
25776 | saved LR | 8 16
25777 +---------------------------------------+
25778 | reserved for compilers | 12 24
25779 +---------------------------------------+
25780 | reserved for binders | 16 32
25781 +---------------------------------------+
25782 | saved TOC pointer | 20 40
25783 +---------------------------------------+
25784 | Parameter save area (P) | 24 48
25785 +---------------------------------------+
25786 | Alloca space (A) | 24+P etc.
25787 +---------------------------------------+
25788 | Local variable space (L) | 24+P+A
25789 +---------------------------------------+
25790 | Float/int conversion temporary (X) | 24+P+A+L
25791 +---------------------------------------+
25792 | Save area for AltiVec registers (W) | 24+P+A+L+X
25793 +---------------------------------------+
25794 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
25795 +---------------------------------------+
25796 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
25797 +---------------------------------------+
25798 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
25799 +---------------------------------------+
25800 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
25801 +---------------------------------------+
25802 old SP->| back chain to caller's caller |
25803 +---------------------------------------+
25805 The required alignment for AIX configurations is two words (i.e., 8
25806 or 16 bytes).
25808 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
25810 SP----> +---------------------------------------+
25811 | Back chain to caller | 0
25812 +---------------------------------------+
25813 | Save area for CR | 8
25814 +---------------------------------------+
25815 | Saved LR | 16
25816 +---------------------------------------+
25817 | Saved TOC pointer | 24
25818 +---------------------------------------+
25819 | Parameter save area (P) | 32
25820 +---------------------------------------+
25821 | Alloca space (A) | 32+P
25822 +---------------------------------------+
25823 | Local variable space (L) | 32+P+A
25824 +---------------------------------------+
25825 | Save area for AltiVec registers (W) | 32+P+A+L
25826 +---------------------------------------+
25827 | AltiVec alignment padding (Y) | 32+P+A+L+W
25828 +---------------------------------------+
25829 | Save area for GP registers (G) | 32+P+A+L+W+Y
25830 +---------------------------------------+
25831 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
25832 +---------------------------------------+
25833 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
25834 +---------------------------------------+
25837 V.4 stack frames look like:
25839 SP----> +---------------------------------------+
25840 | back chain to caller | 0
25841 +---------------------------------------+
25842 | caller's saved LR | 4
25843 +---------------------------------------+
25844 | Parameter save area (P) | 8
25845 +---------------------------------------+
25846 | Alloca space (A) | 8+P
25847 +---------------------------------------+
25848 | Varargs save area (V) | 8+P+A
25849 +---------------------------------------+
25850 | Local variable space (L) | 8+P+A+V
25851 +---------------------------------------+
25852 | Float/int conversion temporary (X) | 8+P+A+V+L
25853 +---------------------------------------+
25854 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
25855 +---------------------------------------+
25856 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
25857 +---------------------------------------+
25858 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
25859 +---------------------------------------+
25860 | SPE: area for 64-bit GP registers |
25861 +---------------------------------------+
25862 | SPE alignment padding |
25863 +---------------------------------------+
25864 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
25865 +---------------------------------------+
25866 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
25867 +---------------------------------------+
25868 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
25869 +---------------------------------------+
25870 old SP->| back chain to caller's caller |
25871 +---------------------------------------+
25873 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
25874 given. (But note below and in sysv4.h that we require only 8 and
25875 may round up the size of our stack frame anyways. The historical
25876 reason is early versions of powerpc-linux which didn't properly
25877 align the stack at program startup. A happy side-effect is that
25878 -mno-eabi libraries can be used with -meabi programs.)
25880 The EABI configuration defaults to the V.4 layout. However,
25881 the stack alignment requirements may differ. If -mno-eabi is not
25882 given, the required stack alignment is 8 bytes; if -mno-eabi is
25883 given, the required alignment is 16 bytes. (But see V.4 comment
25884 above.) */
25886 #ifndef ABI_STACK_BOUNDARY
25887 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
25888 #endif
25890 static rs6000_stack_t *
25891 rs6000_stack_info (void)
25893 /* We should never be called for thunks, we are not set up for that. */
25894 gcc_assert (!cfun->is_thunk);
25896 rs6000_stack_t *info = &stack_info;
25897 int reg_size = TARGET_32BIT ? 4 : 8;
25898 int ehrd_size;
25899 int ehcr_size;
25900 int save_align;
25901 int first_gp;
25902 HOST_WIDE_INT non_fixed_size;
25903 bool using_static_chain_p;
25905 if (reload_completed && info->reload_completed)
25906 return info;
25908 memset (info, 0, sizeof (*info));
25909 info->reload_completed = reload_completed;
25911 if (TARGET_SPE)
25913 /* Cache value so we don't rescan instruction chain over and over. */
25914 if (cfun->machine->spe_insn_chain_scanned_p == 0)
25915 cfun->machine->spe_insn_chain_scanned_p
25916 = spe_func_has_64bit_regs_p () + 1;
25917 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
25920 /* Select which calling sequence. */
25921 info->abi = DEFAULT_ABI;
25923 /* Calculate which registers need to be saved & save area size. */
25924 info->first_gp_reg_save = first_reg_to_save ();
25925 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
25926 even if it currently looks like we won't. Reload may need it to
25927 get at a constant; if so, it will have already created a constant
25928 pool entry for it. */
25929 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
25930 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25931 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25932 && crtl->uses_const_pool
25933 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
25934 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
25935 else
25936 first_gp = info->first_gp_reg_save;
25938 info->gp_size = reg_size * (32 - first_gp);
25940 /* For the SPE, we have an additional upper 32-bits on each GPR.
25941 Ideally we should save the entire 64-bits only when the upper
25942 half is used in SIMD instructions. Since we only record
25943 registers live (not the size they are used in), this proves
25944 difficult because we'd have to traverse the instruction chain at
25945 the right time, taking reload into account. This is a real pain,
25946 so we opt to save the GPRs in 64-bits always if but one register
25947 gets used in 64-bits. Otherwise, all the registers in the frame
25948 get saved in 32-bits.
25950 So... since when we save all GPRs (except the SP) in 64-bits, the
25951 traditional GP save area will be empty. */
25952 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25953 info->gp_size = 0;
25955 info->first_fp_reg_save = first_fp_reg_to_save ();
25956 info->fp_size = 8 * (64 - info->first_fp_reg_save);
25958 info->first_altivec_reg_save = first_altivec_reg_to_save ();
25959 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
25960 - info->first_altivec_reg_save);
25962 /* Does this function call anything? */
25963 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
25965 /* Determine if we need to save the condition code registers. */
25966 if (save_reg_p (CR2_REGNO)
25967 || save_reg_p (CR3_REGNO)
25968 || save_reg_p (CR4_REGNO))
25970 info->cr_save_p = 1;
25971 if (DEFAULT_ABI == ABI_V4)
25972 info->cr_size = reg_size;
25975 /* If the current function calls __builtin_eh_return, then we need
25976 to allocate stack space for registers that will hold data for
25977 the exception handler. */
25978 if (crtl->calls_eh_return)
25980 unsigned int i;
25981 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
25982 continue;
25984 /* SPE saves EH registers in 64-bits. */
25985 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
25986 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
25988 else
25989 ehrd_size = 0;
25991 /* In the ELFv2 ABI, we also need to allocate space for separate
25992 CR field save areas if the function calls __builtin_eh_return. */
25993 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25995 /* This hard-codes that we have three call-saved CR fields. */
25996 ehcr_size = 3 * reg_size;
25997 /* We do *not* use the regular CR save mechanism. */
25998 info->cr_save_p = 0;
26000 else
26001 ehcr_size = 0;
26003 /* Determine various sizes. */
26004 info->reg_size = reg_size;
26005 info->fixed_size = RS6000_SAVE_AREA;
26006 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
26007 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
26008 TARGET_ALTIVEC ? 16 : 8);
26009 if (FRAME_GROWS_DOWNWARD)
26010 info->vars_size
26011 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
26012 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
26013 - (info->fixed_size + info->vars_size + info->parm_size);
26015 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26016 info->spe_gp_size = 8 * (32 - first_gp);
26018 if (TARGET_ALTIVEC_ABI)
26019 info->vrsave_mask = compute_vrsave_mask ();
26021 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
26022 info->vrsave_size = 4;
26024 compute_save_world_info (info);
26026 /* Calculate the offsets. */
26027 switch (DEFAULT_ABI)
26029 case ABI_NONE:
26030 default:
26031 gcc_unreachable ();
26033 case ABI_AIX:
26034 case ABI_ELFv2:
26035 case ABI_DARWIN:
26036 info->fp_save_offset = -info->fp_size;
26037 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26039 if (TARGET_ALTIVEC_ABI)
26041 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
26043 /* Align stack so vector save area is on a quadword boundary.
26044 The padding goes above the vectors. */
26045 if (info->altivec_size != 0)
26046 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
26048 info->altivec_save_offset = info->vrsave_save_offset
26049 - info->altivec_padding_size
26050 - info->altivec_size;
26051 gcc_assert (info->altivec_size == 0
26052 || info->altivec_save_offset % 16 == 0);
26054 /* Adjust for AltiVec case. */
26055 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
26057 else
26058 info->ehrd_offset = info->gp_save_offset - ehrd_size;
26060 info->ehcr_offset = info->ehrd_offset - ehcr_size;
26061 info->cr_save_offset = reg_size; /* first word when 64-bit. */
26062 info->lr_save_offset = 2*reg_size;
26063 break;
26065 case ABI_V4:
26066 info->fp_save_offset = -info->fp_size;
26067 info->gp_save_offset = info->fp_save_offset - info->gp_size;
26068 info->cr_save_offset = info->gp_save_offset - info->cr_size;
26070 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26072 /* Align stack so SPE GPR save area is aligned on a
26073 double-word boundary. */
26074 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
26075 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
26076 else
26077 info->spe_padding_size = 0;
26079 info->spe_gp_save_offset = info->cr_save_offset
26080 - info->spe_padding_size
26081 - info->spe_gp_size;
26083 /* Adjust for SPE case. */
26084 info->ehrd_offset = info->spe_gp_save_offset;
26086 else if (TARGET_ALTIVEC_ABI)
26088 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
26090 /* Align stack so vector save area is on a quadword boundary. */
26091 if (info->altivec_size != 0)
26092 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
26094 info->altivec_save_offset = info->vrsave_save_offset
26095 - info->altivec_padding_size
26096 - info->altivec_size;
26098 /* Adjust for AltiVec case. */
26099 info->ehrd_offset = info->altivec_save_offset;
26101 else
26102 info->ehrd_offset = info->cr_save_offset;
26104 info->ehrd_offset -= ehrd_size;
26105 info->lr_save_offset = reg_size;
26108 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
26109 info->save_size = RS6000_ALIGN (info->fp_size
26110 + info->gp_size
26111 + info->altivec_size
26112 + info->altivec_padding_size
26113 + info->spe_gp_size
26114 + info->spe_padding_size
26115 + ehrd_size
26116 + ehcr_size
26117 + info->cr_size
26118 + info->vrsave_size,
26119 save_align);
26121 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
26123 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
26124 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
26126 /* Determine if we need to save the link register. */
26127 if (info->calls_p
26128 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26129 && crtl->profile
26130 && !TARGET_PROFILE_KERNEL)
26131 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
26132 #ifdef TARGET_RELOCATABLE
26133 || (DEFAULT_ABI == ABI_V4
26134 && (TARGET_RELOCATABLE || flag_pic > 1)
26135 && get_pool_size () != 0)
26136 #endif
26137 || rs6000_ra_ever_killed ())
26138 info->lr_save_p = 1;
26140 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26141 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26142 && call_used_regs[STATIC_CHAIN_REGNUM]);
26143 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
26145 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
26146 || !(info->savres_strategy & SAVE_INLINE_FPRS)
26147 || !(info->savres_strategy & SAVE_INLINE_VRS)
26148 || !(info->savres_strategy & REST_INLINE_GPRS)
26149 || !(info->savres_strategy & REST_INLINE_FPRS)
26150 || !(info->savres_strategy & REST_INLINE_VRS))
26151 info->lr_save_p = 1;
26153 if (info->lr_save_p)
26154 df_set_regs_ever_live (LR_REGNO, true);
26156 /* Determine if we need to allocate any stack frame:
26158 For AIX we need to push the stack if a frame pointer is needed
26159 (because the stack might be dynamically adjusted), if we are
26160 debugging, if we make calls, or if the sum of fp_save, gp_save,
26161 and local variables are more than the space needed to save all
26162 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
26163 + 18*8 = 288 (GPR13 reserved).
26165 For V.4 we don't have the stack cushion that AIX uses, but assume
26166 that the debugger can handle stackless frames. */
26168 if (info->calls_p)
26169 info->push_p = 1;
26171 else if (DEFAULT_ABI == ABI_V4)
26172 info->push_p = non_fixed_size != 0;
26174 else if (frame_pointer_needed)
26175 info->push_p = 1;
26177 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
26178 info->push_p = 1;
26180 else
26181 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
26183 return info;
26186 /* Return true if the current function uses any GPRs in 64-bit SIMD
26187 mode. */
26189 static bool
26190 spe_func_has_64bit_regs_p (void)
26192 rtx_insn *insns, *insn;
26194 /* Functions that save and restore all the call-saved registers will
26195 need to save/restore the registers in 64-bits. */
26196 if (crtl->calls_eh_return
26197 || cfun->calls_setjmp
26198 || crtl->has_nonlocal_goto)
26199 return true;
26201 insns = get_insns ();
26203 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
26205 if (INSN_P (insn))
26207 rtx i;
26209 /* FIXME: This should be implemented with attributes...
26211 (set_attr "spe64" "true")....then,
26212 if (get_spe64(insn)) return true;
26214 It's the only reliable way to do the stuff below. */
26216 i = PATTERN (insn);
26217 if (GET_CODE (i) == SET)
26219 machine_mode mode = GET_MODE (SET_SRC (i));
26221 if (SPE_VECTOR_MODE (mode))
26222 return true;
26223 if (TARGET_E500_DOUBLE
26224 && (mode == DFmode || FLOAT128_2REG_P (mode)))
26225 return true;
26230 return false;
26233 static void
26234 debug_stack_info (rs6000_stack_t *info)
26236 const char *abi_string;
26238 if (! info)
26239 info = rs6000_stack_info ();
26241 fprintf (stderr, "\nStack information for function %s:\n",
26242 ((current_function_decl && DECL_NAME (current_function_decl))
26243 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
26244 : "<unknown>"));
26246 switch (info->abi)
26248 default: abi_string = "Unknown"; break;
26249 case ABI_NONE: abi_string = "NONE"; break;
26250 case ABI_AIX: abi_string = "AIX"; break;
26251 case ABI_ELFv2: abi_string = "ELFv2"; break;
26252 case ABI_DARWIN: abi_string = "Darwin"; break;
26253 case ABI_V4: abi_string = "V.4"; break;
26256 fprintf (stderr, "\tABI = %5s\n", abi_string);
26258 if (TARGET_ALTIVEC_ABI)
26259 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
26261 if (TARGET_SPE_ABI)
26262 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
26264 if (info->first_gp_reg_save != 32)
26265 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
26267 if (info->first_fp_reg_save != 64)
26268 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
26270 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
26271 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
26272 info->first_altivec_reg_save);
26274 if (info->lr_save_p)
26275 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
26277 if (info->cr_save_p)
26278 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
26280 if (info->vrsave_mask)
26281 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
26283 if (info->push_p)
26284 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
26286 if (info->calls_p)
26287 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
26289 if (info->gp_size)
26290 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
26292 if (info->fp_size)
26293 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
26295 if (info->altivec_size)
26296 fprintf (stderr, "\taltivec_save_offset = %5d\n",
26297 info->altivec_save_offset);
26299 if (info->spe_gp_size)
26300 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
26301 info->spe_gp_save_offset);
26303 if (info->vrsave_size)
26304 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
26305 info->vrsave_save_offset);
26307 if (info->lr_save_p)
26308 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
26310 if (info->cr_save_p)
26311 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
26313 if (info->varargs_save_offset)
26314 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
26316 if (info->total_size)
26317 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26318 info->total_size);
26320 if (info->vars_size)
26321 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
26322 info->vars_size);
26324 if (info->parm_size)
26325 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
26327 if (info->fixed_size)
26328 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
26330 if (info->gp_size)
26331 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
26333 if (info->spe_gp_size)
26334 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
26336 if (info->fp_size)
26337 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
26339 if (info->altivec_size)
26340 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
26342 if (info->vrsave_size)
26343 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
26345 if (info->altivec_padding_size)
26346 fprintf (stderr, "\taltivec_padding_size= %5d\n",
26347 info->altivec_padding_size);
26349 if (info->spe_padding_size)
26350 fprintf (stderr, "\tspe_padding_size = %5d\n",
26351 info->spe_padding_size);
26353 if (info->cr_size)
26354 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
26356 if (info->save_size)
26357 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
26359 if (info->reg_size != 4)
26360 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
26362 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
26364 fprintf (stderr, "\n");
26368 rs6000_return_addr (int count, rtx frame)
26370 /* Currently we don't optimize very well between prolog and body
26371 code and for PIC code the code can be actually quite bad, so
26372 don't try to be too clever here. */
26373 if (count != 0
26374 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
26376 cfun->machine->ra_needs_full_frame = 1;
26378 return
26379 gen_rtx_MEM
26380 (Pmode,
26381 memory_address
26382 (Pmode,
26383 plus_constant (Pmode,
26384 copy_to_reg
26385 (gen_rtx_MEM (Pmode,
26386 memory_address (Pmode, frame))),
26387 RETURN_ADDRESS_OFFSET)));
26390 cfun->machine->ra_need_lr = 1;
26391 return get_hard_reg_initial_val (Pmode, LR_REGNO);
26394 /* Say whether a function is a candidate for sibcall handling or not. */
26396 static bool
26397 rs6000_function_ok_for_sibcall (tree decl, tree exp)
26399 tree fntype;
26401 if (decl)
26402 fntype = TREE_TYPE (decl);
26403 else
26404 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
26406 /* We can't do it if the called function has more vector parameters
26407 than the current function; there's nowhere to put the VRsave code. */
26408 if (TARGET_ALTIVEC_ABI
26409 && TARGET_ALTIVEC_VRSAVE
26410 && !(decl && decl == current_function_decl))
26412 function_args_iterator args_iter;
26413 tree type;
26414 int nvreg = 0;
26416 /* Functions with vector parameters are required to have a
26417 prototype, so the argument type info must be available
26418 here. */
26419 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
26420 if (TREE_CODE (type) == VECTOR_TYPE
26421 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26422 nvreg++;
26424 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
26425 if (TREE_CODE (type) == VECTOR_TYPE
26426 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26427 nvreg--;
26429 if (nvreg > 0)
26430 return false;
26433 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
26434 functions, because the callee may have a different TOC pointer to
26435 the caller and there's no way to ensure we restore the TOC when
26436 we return. With the secure-plt SYSV ABI we can't make non-local
26437 calls when -fpic/PIC because the plt call stubs use r30. */
26438 if (DEFAULT_ABI == ABI_DARWIN
26439 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26440 && decl
26441 && !DECL_EXTERNAL (decl)
26442 && !DECL_WEAK (decl)
26443 && (*targetm.binds_local_p) (decl))
26444 || (DEFAULT_ABI == ABI_V4
26445 && (!TARGET_SECURE_PLT
26446 || !flag_pic
26447 || (decl
26448 && (*targetm.binds_local_p) (decl)))))
26450 tree attr_list = TYPE_ATTRIBUTES (fntype);
26452 if (!lookup_attribute ("longcall", attr_list)
26453 || lookup_attribute ("shortcall", attr_list))
26454 return true;
26457 return false;
26460 static int
26461 rs6000_ra_ever_killed (void)
26463 rtx_insn *top;
26464 rtx reg;
26465 rtx_insn *insn;
26467 if (cfun->is_thunk)
26468 return 0;
26470 if (cfun->machine->lr_save_state)
26471 return cfun->machine->lr_save_state - 1;
26473 /* regs_ever_live has LR marked as used if any sibcalls are present,
26474 but this should not force saving and restoring in the
26475 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
26476 clobbers LR, so that is inappropriate. */
26478 /* Also, the prologue can generate a store into LR that
26479 doesn't really count, like this:
26481 move LR->R0
26482 bcl to set PIC register
26483 move LR->R31
26484 move R0->LR
26486 When we're called from the epilogue, we need to avoid counting
26487 this as a store. */
26489 push_topmost_sequence ();
26490 top = get_insns ();
26491 pop_topmost_sequence ();
26492 reg = gen_rtx_REG (Pmode, LR_REGNO);
26494 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
26496 if (INSN_P (insn))
26498 if (CALL_P (insn))
26500 if (!SIBLING_CALL_P (insn))
26501 return 1;
26503 else if (find_regno_note (insn, REG_INC, LR_REGNO))
26504 return 1;
26505 else if (set_of (reg, insn) != NULL_RTX
26506 && !prologue_epilogue_contains (insn))
26507 return 1;
26510 return 0;
26513 /* Emit instructions needed to load the TOC register.
26514 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
26515 a constant pool; or for SVR4 -fpic. */
26517 void
26518 rs6000_emit_load_toc_table (int fromprolog)
26520 rtx dest;
26521 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26523 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
26525 char buf[30];
26526 rtx lab, tmp1, tmp2, got;
26528 lab = gen_label_rtx ();
26529 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
26530 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26531 if (flag_pic == 2)
26533 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26534 need_toc_init = 1;
26536 else
26537 got = rs6000_got_sym ();
26538 tmp1 = tmp2 = dest;
26539 if (!fromprolog)
26541 tmp1 = gen_reg_rtx (Pmode);
26542 tmp2 = gen_reg_rtx (Pmode);
26544 emit_insn (gen_load_toc_v4_PIC_1 (lab));
26545 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
26546 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
26547 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
26549 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
26551 emit_insn (gen_load_toc_v4_pic_si ());
26552 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26554 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
26556 char buf[30];
26557 rtx temp0 = (fromprolog
26558 ? gen_rtx_REG (Pmode, 0)
26559 : gen_reg_rtx (Pmode));
26561 if (fromprolog)
26563 rtx symF, symL;
26565 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26566 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26568 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26569 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26571 emit_insn (gen_load_toc_v4_PIC_1 (symF));
26572 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26573 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
26575 else
26577 rtx tocsym, lab;
26579 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26580 need_toc_init = 1;
26581 lab = gen_label_rtx ();
26582 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
26583 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26584 if (TARGET_LINK_STACK)
26585 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
26586 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
26588 emit_insn (gen_addsi3 (dest, temp0, dest));
26590 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
26592 /* This is for AIX code running in non-PIC ELF32. */
26593 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26595 need_toc_init = 1;
26596 emit_insn (gen_elf_high (dest, realsym));
26597 emit_insn (gen_elf_low (dest, dest, realsym));
26599 else
26601 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26603 if (TARGET_32BIT)
26604 emit_insn (gen_load_toc_aix_si (dest));
26605 else
26606 emit_insn (gen_load_toc_aix_di (dest));
26610 /* Emit instructions to restore the link register after determining where
26611 its value has been stored. */
26613 void
26614 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
26616 rs6000_stack_t *info = rs6000_stack_info ();
26617 rtx operands[2];
26619 operands[0] = source;
26620 operands[1] = scratch;
26622 if (info->lr_save_p)
26624 rtx frame_rtx = stack_pointer_rtx;
26625 HOST_WIDE_INT sp_offset = 0;
26626 rtx tmp;
26628 if (frame_pointer_needed
26629 || cfun->calls_alloca
26630 || info->total_size > 32767)
26632 tmp = gen_frame_mem (Pmode, frame_rtx);
26633 emit_move_insn (operands[1], tmp);
26634 frame_rtx = operands[1];
26636 else if (info->push_p)
26637 sp_offset = info->total_size;
26639 tmp = plus_constant (Pmode, frame_rtx,
26640 info->lr_save_offset + sp_offset);
26641 tmp = gen_frame_mem (Pmode, tmp);
26642 emit_move_insn (tmp, operands[0]);
26644 else
26645 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
26647 /* Freeze lr_save_p. We've just emitted rtl that depends on the
26648 state of lr_save_p so any change from here on would be a bug. In
26649 particular, stop rs6000_ra_ever_killed from considering the SET
26650 of lr we may have added just above. */
26651 cfun->machine->lr_save_state = info->lr_save_p + 1;
26654 static GTY(()) alias_set_type set = -1;
26656 alias_set_type
26657 get_TOC_alias_set (void)
26659 if (set == -1)
26660 set = new_alias_set ();
26661 return set;
26664 /* This returns nonzero if the current function uses the TOC. This is
26665 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
26666 is generated by the ABI_V4 load_toc_* patterns. */
26667 #if TARGET_ELF
26668 static int
26669 uses_TOC (void)
26671 rtx_insn *insn;
26673 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26674 if (INSN_P (insn))
26676 rtx pat = PATTERN (insn);
26677 int i;
26679 if (GET_CODE (pat) == PARALLEL)
26680 for (i = 0; i < XVECLEN (pat, 0); i++)
26682 rtx sub = XVECEXP (pat, 0, i);
26683 if (GET_CODE (sub) == USE)
26685 sub = XEXP (sub, 0);
26686 if (GET_CODE (sub) == UNSPEC
26687 && XINT (sub, 1) == UNSPEC_TOC)
26688 return 1;
26692 return 0;
26694 #endif
26697 create_TOC_reference (rtx symbol, rtx largetoc_reg)
26699 rtx tocrel, tocreg, hi;
26701 if (TARGET_DEBUG_ADDR)
26703 if (GET_CODE (symbol) == SYMBOL_REF)
26704 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
26705 XSTR (symbol, 0));
26706 else
26708 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
26709 GET_RTX_NAME (GET_CODE (symbol)));
26710 debug_rtx (symbol);
26714 if (!can_create_pseudo_p ())
26715 df_set_regs_ever_live (TOC_REGISTER, true);
26717 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
26718 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
26719 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
26720 return tocrel;
26722 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
26723 if (largetoc_reg != NULL)
26725 emit_move_insn (largetoc_reg, hi);
26726 hi = largetoc_reg;
26728 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
26731 /* Issue assembly directives that create a reference to the given DWARF
26732 FRAME_TABLE_LABEL from the current function section. */
26733 void
26734 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
26736 fprintf (asm_out_file, "\t.ref %s\n",
26737 (* targetm.strip_name_encoding) (frame_table_label));
26740 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
26741 and the change to the stack pointer. */
26743 static void
26744 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
26746 rtvec p;
26747 int i;
26748 rtx regs[3];
26750 i = 0;
26751 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26752 if (hard_frame_needed)
26753 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
26754 if (!(REGNO (fp) == STACK_POINTER_REGNUM
26755 || (hard_frame_needed
26756 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
26757 regs[i++] = fp;
26759 p = rtvec_alloc (i);
26760 while (--i >= 0)
26762 rtx mem = gen_frame_mem (BLKmode, regs[i]);
26763 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
26766 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
26769 /* Emit the correct code for allocating stack space, as insns.
26770 If COPY_REG, make sure a copy of the old frame is left there.
26771 The generated code may use hard register 0 as a temporary. */
26773 static rtx_insn *
26774 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
26776 rtx_insn *insn;
26777 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26778 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
26779 rtx todec = gen_int_mode (-size, Pmode);
26780 rtx par, set, mem;
26782 if (INTVAL (todec) != -size)
26784 warning (0, "stack frame too large");
26785 emit_insn (gen_trap ());
26786 return 0;
26789 if (crtl->limit_stack)
26791 if (REG_P (stack_limit_rtx)
26792 && REGNO (stack_limit_rtx) > 1
26793 && REGNO (stack_limit_rtx) <= 31)
26795 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
26796 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26797 const0_rtx));
26799 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
26800 && TARGET_32BIT
26801 && DEFAULT_ABI == ABI_V4)
26803 rtx toload = gen_rtx_CONST (VOIDmode,
26804 gen_rtx_PLUS (Pmode,
26805 stack_limit_rtx,
26806 GEN_INT (size)));
26808 emit_insn (gen_elf_high (tmp_reg, toload));
26809 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
26810 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26811 const0_rtx));
26813 else
26814 warning (0, "stack limit expression is not supported");
26817 if (copy_reg)
26819 if (copy_off != 0)
26820 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
26821 else
26822 emit_move_insn (copy_reg, stack_reg);
26825 if (size > 32767)
26827 /* Need a note here so that try_split doesn't get confused. */
26828 if (get_last_insn () == NULL_RTX)
26829 emit_note (NOTE_INSN_DELETED);
26830 insn = emit_move_insn (tmp_reg, todec);
26831 try_split (PATTERN (insn), insn, 0);
26832 todec = tmp_reg;
26835 insn = emit_insn (TARGET_32BIT
26836 ? gen_movsi_update_stack (stack_reg, stack_reg,
26837 todec, stack_reg)
26838 : gen_movdi_di_update_stack (stack_reg, stack_reg,
26839 todec, stack_reg));
26840 /* Since we didn't use gen_frame_mem to generate the MEM, grab
26841 it now and set the alias set/attributes. The above gen_*_update
26842 calls will generate a PARALLEL with the MEM set being the first
26843 operation. */
26844 par = PATTERN (insn);
26845 gcc_assert (GET_CODE (par) == PARALLEL);
26846 set = XVECEXP (par, 0, 0);
26847 gcc_assert (GET_CODE (set) == SET);
26848 mem = SET_DEST (set);
26849 gcc_assert (MEM_P (mem));
26850 MEM_NOTRAP_P (mem) = 1;
26851 set_mem_alias_set (mem, get_frame_alias_set ());
26853 RTX_FRAME_RELATED_P (insn) = 1;
26854 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26855 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
26856 GEN_INT (-size))));
26857 return insn;
26860 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
26862 #if PROBE_INTERVAL > 32768
26863 #error Cannot use indexed addressing mode for stack probing
26864 #endif
26866 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
26867 inclusive. These are offsets from the current stack pointer. */
26869 static void
26870 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
26872 /* See if we have a constant small number of probes to generate. If so,
26873 that's the easy case. */
26874 if (first + size <= 32768)
26876 HOST_WIDE_INT i;
26878 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
26879 it exceeds SIZE. If only one probe is needed, this will not
26880 generate any code. Then probe at FIRST + SIZE. */
26881 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
26882 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26883 -(first + i)));
26885 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26886 -(first + size)));
26889 /* Otherwise, do the same as above, but in a loop. Note that we must be
26890 extra careful with variables wrapping around because we might be at
26891 the very top (or the very bottom) of the address space and we have
26892 to be able to handle this case properly; in particular, we use an
26893 equality test for the loop condition. */
26894 else
26896 HOST_WIDE_INT rounded_size;
26897 rtx r12 = gen_rtx_REG (Pmode, 12);
26898 rtx r0 = gen_rtx_REG (Pmode, 0);
26900 /* Sanity check for the addressing mode we're going to use. */
26901 gcc_assert (first <= 32768);
26903 /* Step 1: round SIZE to the previous multiple of the interval. */
26905 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
26908 /* Step 2: compute initial and final value of the loop counter. */
26910 /* TEST_ADDR = SP + FIRST. */
26911 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
26912 -first)));
26914 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
26915 if (rounded_size > 32768)
26917 emit_move_insn (r0, GEN_INT (-rounded_size));
26918 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
26920 else
26921 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
26922 -rounded_size)));
26925 /* Step 3: the loop
26929 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
26930 probe at TEST_ADDR
26932 while (TEST_ADDR != LAST_ADDR)
26934 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
26935 until it is equal to ROUNDED_SIZE. */
26937 if (TARGET_64BIT)
26938 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
26939 else
26940 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
26943 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
26944 that SIZE is equal to ROUNDED_SIZE. */
26946 if (size != rounded_size)
26947 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
26951 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
26952 absolute addresses. */
26954 const char *
26955 output_probe_stack_range (rtx reg1, rtx reg2)
26957 static int labelno = 0;
26958 char loop_lab[32];
26959 rtx xops[2];
26961 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
26963 /* Loop. */
26964 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
26966 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
26967 xops[0] = reg1;
26968 xops[1] = GEN_INT (-PROBE_INTERVAL);
26969 output_asm_insn ("addi %0,%0,%1", xops);
26971 /* Probe at TEST_ADDR. */
26972 xops[1] = gen_rtx_REG (Pmode, 0);
26973 output_asm_insn ("stw %1,0(%0)", xops);
26975 /* Test if TEST_ADDR == LAST_ADDR. */
26976 xops[1] = reg2;
26977 if (TARGET_64BIT)
26978 output_asm_insn ("cmpd 0,%0,%1", xops);
26979 else
26980 output_asm_insn ("cmpw 0,%0,%1", xops);
26982 /* Branch. */
26983 fputs ("\tbne 0,", asm_out_file);
26984 assemble_name_raw (asm_out_file, loop_lab);
26985 fputc ('\n', asm_out_file);
26987 return "";
26990 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26991 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26992 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26993 deduce these equivalences by itself so it wasn't necessary to hold
26994 its hand so much. Don't be tempted to always supply d2_f_d_e with
26995 the actual cfa register, ie. r31 when we are using a hard frame
26996 pointer. That fails when saving regs off r1, and sched moves the
26997 r31 setup past the reg saves. */
26999 static rtx_insn *
27000 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
27001 rtx reg2, rtx repl2)
27003 rtx repl;
27005 if (REGNO (reg) == STACK_POINTER_REGNUM)
27007 gcc_checking_assert (val == 0);
27008 repl = NULL_RTX;
27010 else
27011 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27012 GEN_INT (val));
27014 rtx pat = PATTERN (insn);
27015 if (!repl && !reg2)
27017 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
27018 if (GET_CODE (pat) == PARALLEL)
27019 for (int i = 0; i < XVECLEN (pat, 0); i++)
27020 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27022 rtx set = XVECEXP (pat, 0, i);
27024 /* If this PARALLEL has been emitted for out-of-line
27025 register save functions, or store multiple, then omit
27026 eh_frame info for any user-defined global regs. If
27027 eh_frame info is supplied, frame unwinding will
27028 restore a user reg. */
27029 if (!REG_P (SET_SRC (set))
27030 || !fixed_reg_p (REGNO (SET_SRC (set))))
27031 RTX_FRAME_RELATED_P (set) = 1;
27033 RTX_FRAME_RELATED_P (insn) = 1;
27034 return insn;
27037 /* We expect that 'pat' is either a SET or a PARALLEL containing
27038 SETs (and possibly other stuff). In a PARALLEL, all the SETs
27039 are important so they all have to be marked RTX_FRAME_RELATED_P.
27040 Call simplify_replace_rtx on the SETs rather than the whole insn
27041 so as to leave the other stuff alone (for example USE of r12). */
27043 if (GET_CODE (pat) == SET)
27045 if (repl)
27046 pat = simplify_replace_rtx (pat, reg, repl);
27047 if (reg2)
27048 pat = simplify_replace_rtx (pat, reg2, repl2);
27050 else if (GET_CODE (pat) == PARALLEL)
27052 pat = shallow_copy_rtx (pat);
27053 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
27055 for (int i = 0; i < XVECLEN (pat, 0); i++)
27056 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
27058 rtx set = XVECEXP (pat, 0, i);
27060 if (repl)
27061 set = simplify_replace_rtx (set, reg, repl);
27062 if (reg2)
27063 set = simplify_replace_rtx (set, reg2, repl2);
27064 XVECEXP (pat, 0, i) = set;
27066 /* Omit eh_frame info for any user-defined global regs. */
27067 if (!REG_P (SET_SRC (set))
27068 || !fixed_reg_p (REGNO (SET_SRC (set))))
27069 RTX_FRAME_RELATED_P (set) = 1;
27072 else
27073 gcc_unreachable ();
27075 RTX_FRAME_RELATED_P (insn) = 1;
27076 if (repl || reg2)
27077 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
27079 return insn;
27082 /* Returns an insn that has a vrsave set operation with the
27083 appropriate CLOBBERs. */
27085 static rtx
27086 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
27088 int nclobs, i;
27089 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
27090 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27092 clobs[0]
27093 = gen_rtx_SET (vrsave,
27094 gen_rtx_UNSPEC_VOLATILE (SImode,
27095 gen_rtvec (2, reg, vrsave),
27096 UNSPECV_SET_VRSAVE));
27098 nclobs = 1;
27100 /* We need to clobber the registers in the mask so the scheduler
27101 does not move sets to VRSAVE before sets of AltiVec registers.
27103 However, if the function receives nonlocal gotos, reload will set
27104 all call saved registers live. We will end up with:
27106 (set (reg 999) (mem))
27107 (parallel [ (set (reg vrsave) (unspec blah))
27108 (clobber (reg 999))])
27110 The clobber will cause the store into reg 999 to be dead, and
27111 flow will attempt to delete an epilogue insn. In this case, we
27112 need an unspec use/set of the register. */
27114 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27115 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27117 if (!epiloguep || call_used_regs [i])
27118 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
27119 gen_rtx_REG (V4SImode, i));
27120 else
27122 rtx reg = gen_rtx_REG (V4SImode, i);
27124 clobs[nclobs++]
27125 = gen_rtx_SET (reg,
27126 gen_rtx_UNSPEC (V4SImode,
27127 gen_rtvec (1, reg), 27));
27131 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
27133 for (i = 0; i < nclobs; ++i)
27134 XVECEXP (insn, 0, i) = clobs[i];
27136 return insn;
27139 static rtx
27140 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
27142 rtx addr, mem;
27144 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
27145 mem = gen_frame_mem (GET_MODE (reg), addr);
27146 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
27149 static rtx
27150 gen_frame_load (rtx reg, rtx frame_reg, int offset)
27152 return gen_frame_set (reg, frame_reg, offset, false);
27155 static rtx
27156 gen_frame_store (rtx reg, rtx frame_reg, int offset)
27158 return gen_frame_set (reg, frame_reg, offset, true);
27161 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
27162 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
27164 static rtx_insn *
27165 emit_frame_save (rtx frame_reg, machine_mode mode,
27166 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
27168 rtx reg;
27170 /* Some cases that need register indexed addressing. */
27171 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
27172 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
27173 || (TARGET_E500_DOUBLE && mode == DFmode)
27174 || (TARGET_SPE_ABI
27175 && SPE_VECTOR_MODE (mode)
27176 && !SPE_CONST_OFFSET_OK (offset))));
27178 reg = gen_rtx_REG (mode, regno);
27179 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
27180 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
27181 NULL_RTX, NULL_RTX);
27184 /* Emit an offset memory reference suitable for a frame store, while
27185 converting to a valid addressing mode. */
27187 static rtx
27188 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
27190 rtx int_rtx, offset_rtx;
27192 int_rtx = GEN_INT (offset);
27194 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
27195 || (TARGET_E500_DOUBLE && mode == DFmode))
27197 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
27198 emit_move_insn (offset_rtx, int_rtx);
27200 else
27201 offset_rtx = int_rtx;
27203 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
27206 #ifndef TARGET_FIX_AND_CONTINUE
27207 #define TARGET_FIX_AND_CONTINUE 0
27208 #endif
27210 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
27211 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
27212 #define LAST_SAVRES_REGISTER 31
27213 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
27215 enum {
27216 SAVRES_LR = 0x1,
27217 SAVRES_SAVE = 0x2,
27218 SAVRES_REG = 0x0c,
27219 SAVRES_GPR = 0,
27220 SAVRES_FPR = 4,
27221 SAVRES_VR = 8
27224 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
27226 /* Temporary holding space for an out-of-line register save/restore
27227 routine name. */
27228 static char savres_routine_name[30];
27230 /* Return the name for an out-of-line register save/restore routine.
27231 We are saving/restoring GPRs if GPR is true. */
27233 static char *
27234 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
27236 const char *prefix = "";
27237 const char *suffix = "";
27239 /* Different targets are supposed to define
27240 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
27241 routine name could be defined with:
27243 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
27245 This is a nice idea in practice, but in reality, things are
27246 complicated in several ways:
27248 - ELF targets have save/restore routines for GPRs.
27250 - SPE targets use different prefixes for 32/64-bit registers, and
27251 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
27253 - PPC64 ELF targets have routines for save/restore of GPRs that
27254 differ in what they do with the link register, so having a set
27255 prefix doesn't work. (We only use one of the save routines at
27256 the moment, though.)
27258 - PPC32 elf targets have "exit" versions of the restore routines
27259 that restore the link register and can save some extra space.
27260 These require an extra suffix. (There are also "tail" versions
27261 of the restore routines and "GOT" versions of the save routines,
27262 but we don't generate those at present. Same problems apply,
27263 though.)
27265 We deal with all this by synthesizing our own prefix/suffix and
27266 using that for the simple sprintf call shown above. */
27267 if (TARGET_SPE)
27269 /* No floating point saves on the SPE. */
27270 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
27272 if ((sel & SAVRES_SAVE))
27273 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
27274 else
27275 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
27277 if ((sel & SAVRES_LR))
27278 suffix = "_x";
27280 else if (DEFAULT_ABI == ABI_V4)
27282 if (TARGET_64BIT)
27283 goto aix_names;
27285 if ((sel & SAVRES_REG) == SAVRES_GPR)
27286 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
27287 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27288 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
27289 else if ((sel & SAVRES_REG) == SAVRES_VR)
27290 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27291 else
27292 abort ();
27294 if ((sel & SAVRES_LR))
27295 suffix = "_x";
27297 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27299 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
27300 /* No out-of-line save/restore routines for GPRs on AIX. */
27301 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
27302 #endif
27304 aix_names:
27305 if ((sel & SAVRES_REG) == SAVRES_GPR)
27306 prefix = ((sel & SAVRES_SAVE)
27307 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
27308 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
27309 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27311 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27312 if ((sel & SAVRES_LR))
27313 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
27314 else
27315 #endif
27317 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
27318 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
27321 else if ((sel & SAVRES_REG) == SAVRES_VR)
27322 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
27323 else
27324 abort ();
27327 if (DEFAULT_ABI == ABI_DARWIN)
27329 /* The Darwin approach is (slightly) different, in order to be
27330 compatible with code generated by the system toolchain. There is a
27331 single symbol for the start of save sequence, and the code here
27332 embeds an offset into that code on the basis of the first register
27333 to be saved. */
27334 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
27335 if ((sel & SAVRES_REG) == SAVRES_GPR)
27336 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
27337 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
27338 (regno - 13) * 4, prefix, regno);
27339 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27340 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
27341 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
27342 else if ((sel & SAVRES_REG) == SAVRES_VR)
27343 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
27344 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
27345 else
27346 abort ();
27348 else
27349 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
27351 return savres_routine_name;
27354 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
27355 We are saving/restoring GPRs if GPR is true. */
27357 static rtx
27358 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
27360 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
27361 ? info->first_gp_reg_save
27362 : (sel & SAVRES_REG) == SAVRES_FPR
27363 ? info->first_fp_reg_save - 32
27364 : (sel & SAVRES_REG) == SAVRES_VR
27365 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
27366 : -1);
27367 rtx sym;
27368 int select = sel;
27370 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
27371 versions of the gpr routines. */
27372 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
27373 && info->spe_64bit_regs_used)
27374 select ^= SAVRES_FPR ^ SAVRES_GPR;
27376 /* Don't generate bogus routine names. */
27377 gcc_assert (FIRST_SAVRES_REGISTER <= regno
27378 && regno <= LAST_SAVRES_REGISTER
27379 && select >= 0 && select <= 12);
27381 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
27383 if (sym == NULL)
27385 char *name;
27387 name = rs6000_savres_routine_name (info, regno, sel);
27389 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
27390 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
27391 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
27394 return sym;
27397 /* Emit a sequence of insns, including a stack tie if needed, for
27398 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
27399 reset the stack pointer, but move the base of the frame into
27400 reg UPDT_REGNO for use by out-of-line register restore routines. */
27402 static rtx
27403 rs6000_emit_stack_reset (rs6000_stack_t *info,
27404 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
27405 unsigned updt_regno)
27407 rtx updt_reg_rtx;
27409 /* This blockage is needed so that sched doesn't decide to move
27410 the sp change before the register restores. */
27411 if (DEFAULT_ABI == ABI_V4
27412 || (TARGET_SPE_ABI
27413 && info->spe_64bit_regs_used != 0
27414 && info->first_gp_reg_save != 32))
27415 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
27417 /* If we are restoring registers out-of-line, we will be using the
27418 "exit" variants of the restore routines, which will reset the
27419 stack for us. But we do need to point updt_reg into the
27420 right place for those routines. */
27421 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
27423 if (frame_off != 0)
27424 return emit_insn (gen_add3_insn (updt_reg_rtx,
27425 frame_reg_rtx, GEN_INT (frame_off)));
27426 else if (REGNO (frame_reg_rtx) != updt_regno)
27427 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
27429 return NULL_RTX;
27432 /* Return the register number used as a pointer by out-of-line
27433 save/restore functions. */
27435 static inline unsigned
27436 ptr_regno_for_savres (int sel)
27438 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27439 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
27440 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
27443 /* Construct a parallel rtx describing the effect of a call to an
27444 out-of-line register save/restore routine, and emit the insn
27445 or jump_insn as appropriate. */
27447 static rtx_insn *
27448 rs6000_emit_savres_rtx (rs6000_stack_t *info,
27449 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
27450 machine_mode reg_mode, int sel)
27452 int i;
27453 int offset, start_reg, end_reg, n_regs, use_reg;
27454 int reg_size = GET_MODE_SIZE (reg_mode);
27455 rtx sym;
27456 rtvec p;
27457 rtx par;
27458 rtx_insn *insn;
27460 offset = 0;
27461 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27462 ? info->first_gp_reg_save
27463 : (sel & SAVRES_REG) == SAVRES_FPR
27464 ? info->first_fp_reg_save
27465 : (sel & SAVRES_REG) == SAVRES_VR
27466 ? info->first_altivec_reg_save
27467 : -1);
27468 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27469 ? 32
27470 : (sel & SAVRES_REG) == SAVRES_FPR
27471 ? 64
27472 : (sel & SAVRES_REG) == SAVRES_VR
27473 ? LAST_ALTIVEC_REGNO + 1
27474 : -1);
27475 n_regs = end_reg - start_reg;
27476 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
27477 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
27478 + n_regs);
27480 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27481 RTVEC_ELT (p, offset++) = ret_rtx;
27483 RTVEC_ELT (p, offset++)
27484 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27486 sym = rs6000_savres_routine_sym (info, sel);
27487 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
27489 use_reg = ptr_regno_for_savres (sel);
27490 if ((sel & SAVRES_REG) == SAVRES_VR)
27492 /* Vector regs are saved/restored using [reg+reg] addressing. */
27493 RTVEC_ELT (p, offset++)
27494 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27495 RTVEC_ELT (p, offset++)
27496 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
27498 else
27499 RTVEC_ELT (p, offset++)
27500 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27502 for (i = 0; i < end_reg - start_reg; i++)
27503 RTVEC_ELT (p, i + offset)
27504 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
27505 frame_reg_rtx, save_area_offset + reg_size * i,
27506 (sel & SAVRES_SAVE) != 0);
27508 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27509 RTVEC_ELT (p, i + offset)
27510 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
27512 par = gen_rtx_PARALLEL (VOIDmode, p);
27514 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27516 insn = emit_jump_insn (par);
27517 JUMP_LABEL (insn) = ret_rtx;
27519 else
27520 insn = emit_insn (par);
27521 return insn;
27524 /* Emit code to store CR fields that need to be saved into REG. */
27526 static void
27527 rs6000_emit_move_from_cr (rtx reg)
27529 /* Only the ELFv2 ABI allows storing only selected fields. */
27530 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
27532 int i, cr_reg[8], count = 0;
27534 /* Collect CR fields that must be saved. */
27535 for (i = 0; i < 8; i++)
27536 if (save_reg_p (CR0_REGNO + i))
27537 cr_reg[count++] = i;
27539 /* If it's just a single one, use mfcrf. */
27540 if (count == 1)
27542 rtvec p = rtvec_alloc (1);
27543 rtvec r = rtvec_alloc (2);
27544 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
27545 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
27546 RTVEC_ELT (p, 0)
27547 = gen_rtx_SET (reg,
27548 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
27550 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27551 return;
27554 /* ??? It might be better to handle count == 2 / 3 cases here
27555 as well, using logical operations to combine the values. */
27558 emit_insn (gen_movesi_from_cr (reg));
27561 /* Return whether the split-stack arg pointer (r12) is used. */
27563 static bool
27564 split_stack_arg_pointer_used_p (void)
27566 /* If the pseudo holding the arg pointer is no longer a pseudo,
27567 then the arg pointer is used. */
27568 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
27569 && (!REG_P (cfun->machine->split_stack_arg_pointer)
27570 || (REGNO (cfun->machine->split_stack_arg_pointer)
27571 < FIRST_PSEUDO_REGISTER)))
27572 return true;
27574 /* Unfortunately we also need to do some code scanning, since
27575 r12 may have been substituted for the pseudo. */
27576 rtx_insn *insn;
27577 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
27578 FOR_BB_INSNS (bb, insn)
27579 if (NONDEBUG_INSN_P (insn))
27581 /* A call destroys r12. */
27582 if (CALL_P (insn))
27583 return false;
27585 df_ref use;
27586 FOR_EACH_INSN_USE (use, insn)
27588 rtx x = DF_REF_REG (use);
27589 if (REG_P (x) && REGNO (x) == 12)
27590 return true;
27592 df_ref def;
27593 FOR_EACH_INSN_DEF (def, insn)
27595 rtx x = DF_REF_REG (def);
27596 if (REG_P (x) && REGNO (x) == 12)
27597 return false;
27600 return bitmap_bit_p (DF_LR_OUT (bb), 12);
27603 /* Return whether we need to emit an ELFv2 global entry point prologue. */
27605 static bool
27606 rs6000_global_entry_point_needed_p (void)
27608 /* Only needed for the ELFv2 ABI. */
27609 if (DEFAULT_ABI != ABI_ELFv2)
27610 return false;
27612 /* With -msingle-pic-base, we assume the whole program shares the same
27613 TOC, so no global entry point prologues are needed anywhere. */
27614 if (TARGET_SINGLE_PIC_BASE)
27615 return false;
27617 /* Ensure we have a global entry point for thunks. ??? We could
27618 avoid that if the target routine doesn't need a global entry point,
27619 but we do not know whether this is the case at this point. */
27620 if (cfun->is_thunk)
27621 return true;
27623 /* For regular functions, rs6000_emit_prologue sets this flag if the
27624 routine ever uses the TOC pointer. */
27625 return cfun->machine->r2_setup_needed;
27628 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
27629 static sbitmap
27630 rs6000_get_separate_components (void)
27632 rs6000_stack_t *info = rs6000_stack_info ();
27634 if (WORLD_SAVE_P (info))
27635 return NULL;
27637 if (TARGET_SPE_ABI)
27638 return NULL;
27640 sbitmap components = sbitmap_alloc (32);
27641 bitmap_clear (components);
27643 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
27644 && !(info->savres_strategy & REST_MULTIPLE));
27646 /* The GPRs we need saved to the frame. */
27647 if ((info->savres_strategy & SAVE_INLINE_GPRS)
27648 && (info->savres_strategy & REST_INLINE_GPRS))
27650 int reg_size = TARGET_32BIT ? 4 : 8;
27651 int offset = info->gp_save_offset;
27652 if (info->push_p)
27653 offset += info->total_size;
27655 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27657 if (IN_RANGE (offset, -0x8000, 0x7fff)
27658 && rs6000_reg_live_or_pic_offset_p (regno))
27659 bitmap_set_bit (components, regno);
27661 offset += reg_size;
27665 /* Don't mess with the hard frame pointer. */
27666 if (frame_pointer_needed)
27667 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
27669 /* Don't mess with the fixed TOC register. */
27670 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
27671 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27672 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27673 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
27675 /* Optimize LR save and restore if we can. This is component 0. Any
27676 out-of-line register save/restore routines need LR. */
27677 if (info->lr_save_p
27678 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27679 && (info->savres_strategy & SAVE_INLINE_GPRS)
27680 && (info->savres_strategy & REST_INLINE_GPRS)
27681 && (info->savres_strategy & SAVE_INLINE_FPRS)
27682 && (info->savres_strategy & REST_INLINE_FPRS)
27683 && (info->savres_strategy & SAVE_INLINE_VRS)
27684 && (info->savres_strategy & REST_INLINE_VRS))
27686 int offset = info->lr_save_offset;
27687 if (info->push_p)
27688 offset += info->total_size;
27689 if (IN_RANGE (offset, -0x8000, 0x7fff))
27690 bitmap_set_bit (components, 0);
27693 return components;
27696 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
27697 static sbitmap
27698 rs6000_components_for_bb (basic_block bb)
27700 rs6000_stack_t *info = rs6000_stack_info ();
27702 bitmap in = DF_LIVE_IN (bb);
27703 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
27704 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
27706 sbitmap components = sbitmap_alloc (32);
27707 bitmap_clear (components);
27709 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
27710 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
27711 if (bitmap_bit_p (in, regno)
27712 || bitmap_bit_p (gen, regno)
27713 || bitmap_bit_p (kill, regno))
27714 bitmap_set_bit (components, regno);
27716 /* LR needs to be saved around a bb if it is killed in that bb. */
27717 if (bitmap_bit_p (gen, LR_REGNO)
27718 || bitmap_bit_p (kill, LR_REGNO))
27719 bitmap_set_bit (components, 0);
27721 return components;
27724 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
27725 static void
27726 rs6000_disqualify_components (sbitmap components, edge e,
27727 sbitmap edge_components, bool /*is_prologue*/)
27729 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
27730 live where we want to place that code. */
27731 if (bitmap_bit_p (edge_components, 0)
27732 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
27734 if (dump_file)
27735 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
27736 "on entry to bb %d\n", e->dest->index);
27737 bitmap_clear_bit (components, 0);
27741 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
27742 static void
27743 rs6000_emit_prologue_components (sbitmap components)
27745 rs6000_stack_t *info = rs6000_stack_info ();
27746 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27747 ? HARD_FRAME_POINTER_REGNUM
27748 : STACK_POINTER_REGNUM);
27749 int reg_size = TARGET_32BIT ? 4 : 8;
27751 /* Prologue for LR. */
27752 if (bitmap_bit_p (components, 0))
27754 rtx reg = gen_rtx_REG (Pmode, 0);
27755 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27756 RTX_FRAME_RELATED_P (insn) = 1;
27757 add_reg_note (insn, REG_CFA_REGISTER, NULL);
27759 int offset = info->lr_save_offset;
27760 if (info->push_p)
27761 offset += info->total_size;
27763 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27764 RTX_FRAME_RELATED_P (insn) = 1;
27765 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27766 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
27767 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
27770 /* Prologue for the GPRs. */
27771 int offset = info->gp_save_offset;
27772 if (info->push_p)
27773 offset += info->total_size;
27775 for (int i = info->first_gp_reg_save; i < 32; i++)
27777 if (bitmap_bit_p (components, i))
27779 rtx reg = gen_rtx_REG (Pmode, i);
27780 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
27781 RTX_FRAME_RELATED_P (insn) = 1;
27782 rtx set = copy_rtx (single_set (insn));
27783 add_reg_note (insn, REG_CFA_OFFSET, set);
27786 offset += reg_size;
27790 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
27791 static void
27792 rs6000_emit_epilogue_components (sbitmap components)
27794 rs6000_stack_t *info = rs6000_stack_info ();
27795 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
27796 ? HARD_FRAME_POINTER_REGNUM
27797 : STACK_POINTER_REGNUM);
27798 int reg_size = TARGET_32BIT ? 4 : 8;
27800 /* Epilogue for the GPRs. */
27801 int offset = info->gp_save_offset;
27802 if (info->push_p)
27803 offset += info->total_size;
27805 for (int i = info->first_gp_reg_save; i < 32; i++)
27807 if (bitmap_bit_p (components, i))
27809 rtx reg = gen_rtx_REG (Pmode, i);
27810 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27811 RTX_FRAME_RELATED_P (insn) = 1;
27812 add_reg_note (insn, REG_CFA_RESTORE, reg);
27815 offset += reg_size;
27818 /* Epilogue for LR. */
27819 if (bitmap_bit_p (components, 0))
27821 int offset = info->lr_save_offset;
27822 if (info->push_p)
27823 offset += info->total_size;
27825 rtx reg = gen_rtx_REG (Pmode, 0);
27826 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
27828 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27829 insn = emit_move_insn (lr, reg);
27830 RTX_FRAME_RELATED_P (insn) = 1;
27831 add_reg_note (insn, REG_CFA_RESTORE, lr);
27835 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
27836 static void
27837 rs6000_set_handled_components (sbitmap components)
27839 rs6000_stack_t *info = rs6000_stack_info ();
27841 for (int i = info->first_gp_reg_save; i < 32; i++)
27842 if (bitmap_bit_p (components, i))
27843 cfun->machine->gpr_is_wrapped_separately[i] = true;
27845 if (bitmap_bit_p (components, 0))
27846 cfun->machine->lr_is_wrapped_separately = true;
27849 /* Emit function prologue as insns. */
27851 void
27852 rs6000_emit_prologue (void)
27854 rs6000_stack_t *info = rs6000_stack_info ();
27855 machine_mode reg_mode = Pmode;
27856 int reg_size = TARGET_32BIT ? 4 : 8;
27857 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27858 rtx frame_reg_rtx = sp_reg_rtx;
27859 unsigned int cr_save_regno;
27860 rtx cr_save_rtx = NULL_RTX;
27861 rtx_insn *insn;
27862 int strategy;
27863 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27864 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27865 && call_used_regs[STATIC_CHAIN_REGNUM]);
27866 int using_split_stack = (flag_split_stack
27867 && (lookup_attribute ("no_split_stack",
27868 DECL_ATTRIBUTES (cfun->decl))
27869 == NULL));
27871 /* Offset to top of frame for frame_reg and sp respectively. */
27872 HOST_WIDE_INT frame_off = 0;
27873 HOST_WIDE_INT sp_off = 0;
27874 /* sp_adjust is the stack adjusting instruction, tracked so that the
27875 insn setting up the split-stack arg pointer can be emitted just
27876 prior to it, when r12 is not used here for other purposes. */
27877 rtx_insn *sp_adjust = 0;
27879 #if CHECKING_P
27880 /* Track and check usage of r0, r11, r12. */
27881 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27882 #define START_USE(R) do \
27884 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27885 reg_inuse |= 1 << (R); \
27886 } while (0)
27887 #define END_USE(R) do \
27889 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27890 reg_inuse &= ~(1 << (R)); \
27891 } while (0)
27892 #define NOT_INUSE(R) do \
27894 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27895 } while (0)
27896 #else
27897 #define START_USE(R) do {} while (0)
27898 #define END_USE(R) do {} while (0)
27899 #define NOT_INUSE(R) do {} while (0)
27900 #endif
27902 if (DEFAULT_ABI == ABI_ELFv2
27903 && !TARGET_SINGLE_PIC_BASE)
27905 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27907 /* With -mminimal-toc we may generate an extra use of r2 below. */
27908 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27909 cfun->machine->r2_setup_needed = true;
27913 if (flag_stack_usage_info)
27914 current_function_static_stack_size = info->total_size;
27916 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27918 HOST_WIDE_INT size = info->total_size;
27920 if (crtl->is_leaf && !cfun->calls_alloca)
27922 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27923 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
27924 size - STACK_CHECK_PROTECT);
27926 else if (size > 0)
27927 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
27930 if (TARGET_FIX_AND_CONTINUE)
27932 /* gdb on darwin arranges to forward a function from the old
27933 address by modifying the first 5 instructions of the function
27934 to branch to the overriding function. This is necessary to
27935 permit function pointers that point to the old function to
27936 actually forward to the new function. */
27937 emit_insn (gen_nop ());
27938 emit_insn (gen_nop ());
27939 emit_insn (gen_nop ());
27940 emit_insn (gen_nop ());
27941 emit_insn (gen_nop ());
27944 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27946 reg_mode = V2SImode;
27947 reg_size = 8;
27950 /* Handle world saves specially here. */
27951 if (WORLD_SAVE_P (info))
27953 int i, j, sz;
27954 rtx treg;
27955 rtvec p;
27956 rtx reg0;
27958 /* save_world expects lr in r0. */
27959 reg0 = gen_rtx_REG (Pmode, 0);
27960 if (info->lr_save_p)
27962 insn = emit_move_insn (reg0,
27963 gen_rtx_REG (Pmode, LR_REGNO));
27964 RTX_FRAME_RELATED_P (insn) = 1;
27967 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27968 assumptions about the offsets of various bits of the stack
27969 frame. */
27970 gcc_assert (info->gp_save_offset == -220
27971 && info->fp_save_offset == -144
27972 && info->lr_save_offset == 8
27973 && info->cr_save_offset == 4
27974 && info->push_p
27975 && info->lr_save_p
27976 && (!crtl->calls_eh_return
27977 || info->ehrd_offset == -432)
27978 && info->vrsave_save_offset == -224
27979 && info->altivec_save_offset == -416);
27981 treg = gen_rtx_REG (SImode, 11);
27982 emit_move_insn (treg, GEN_INT (-info->total_size));
27984 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27985 in R11. It also clobbers R12, so beware! */
27987 /* Preserve CR2 for save_world prologues */
27988 sz = 5;
27989 sz += 32 - info->first_gp_reg_save;
27990 sz += 64 - info->first_fp_reg_save;
27991 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27992 p = rtvec_alloc (sz);
27993 j = 0;
27994 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27995 gen_rtx_REG (SImode,
27996 LR_REGNO));
27997 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27998 gen_rtx_SYMBOL_REF (Pmode,
27999 "*save_world"));
28000 /* We do floats first so that the instruction pattern matches
28001 properly. */
28002 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28003 RTVEC_ELT (p, j++)
28004 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28005 ? DFmode : SFmode,
28006 info->first_fp_reg_save + i),
28007 frame_reg_rtx,
28008 info->fp_save_offset + frame_off + 8 * i);
28009 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28010 RTVEC_ELT (p, j++)
28011 = gen_frame_store (gen_rtx_REG (V4SImode,
28012 info->first_altivec_reg_save + i),
28013 frame_reg_rtx,
28014 info->altivec_save_offset + frame_off + 16 * i);
28015 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28016 RTVEC_ELT (p, j++)
28017 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28018 frame_reg_rtx,
28019 info->gp_save_offset + frame_off + reg_size * i);
28021 /* CR register traditionally saved as CR2. */
28022 RTVEC_ELT (p, j++)
28023 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
28024 frame_reg_rtx, info->cr_save_offset + frame_off);
28025 /* Explain about use of R0. */
28026 if (info->lr_save_p)
28027 RTVEC_ELT (p, j++)
28028 = gen_frame_store (reg0,
28029 frame_reg_rtx, info->lr_save_offset + frame_off);
28030 /* Explain what happens to the stack pointer. */
28032 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
28033 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
28036 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28037 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28038 treg, GEN_INT (-info->total_size));
28039 sp_off = frame_off = info->total_size;
28042 strategy = info->savres_strategy;
28044 /* For V.4, update stack before we do any saving and set back pointer. */
28045 if (! WORLD_SAVE_P (info)
28046 && info->push_p
28047 && (DEFAULT_ABI == ABI_V4
28048 || crtl->calls_eh_return))
28050 bool need_r11 = (TARGET_SPE
28051 ? (!(strategy & SAVE_INLINE_GPRS)
28052 && info->spe_64bit_regs_used == 0)
28053 : (!(strategy & SAVE_INLINE_FPRS)
28054 || !(strategy & SAVE_INLINE_GPRS)
28055 || !(strategy & SAVE_INLINE_VRS)));
28056 int ptr_regno = -1;
28057 rtx ptr_reg = NULL_RTX;
28058 int ptr_off = 0;
28060 if (info->total_size < 32767)
28061 frame_off = info->total_size;
28062 else if (need_r11)
28063 ptr_regno = 11;
28064 else if (info->cr_save_p
28065 || info->lr_save_p
28066 || info->first_fp_reg_save < 64
28067 || info->first_gp_reg_save < 32
28068 || info->altivec_size != 0
28069 || info->vrsave_size != 0
28070 || crtl->calls_eh_return)
28071 ptr_regno = 12;
28072 else
28074 /* The prologue won't be saving any regs so there is no need
28075 to set up a frame register to access any frame save area.
28076 We also won't be using frame_off anywhere below, but set
28077 the correct value anyway to protect against future
28078 changes to this function. */
28079 frame_off = info->total_size;
28081 if (ptr_regno != -1)
28083 /* Set up the frame offset to that needed by the first
28084 out-of-line save function. */
28085 START_USE (ptr_regno);
28086 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28087 frame_reg_rtx = ptr_reg;
28088 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
28089 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
28090 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
28091 ptr_off = info->gp_save_offset + info->gp_size;
28092 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
28093 ptr_off = info->altivec_save_offset + info->altivec_size;
28094 frame_off = -ptr_off;
28096 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28097 ptr_reg, ptr_off);
28098 if (REGNO (frame_reg_rtx) == 12)
28099 sp_adjust = 0;
28100 sp_off = info->total_size;
28101 if (frame_reg_rtx != sp_reg_rtx)
28102 rs6000_emit_stack_tie (frame_reg_rtx, false);
28105 /* If we use the link register, get it into r0. */
28106 if (!WORLD_SAVE_P (info) && info->lr_save_p
28107 && !cfun->machine->lr_is_wrapped_separately)
28109 rtx addr, reg, mem;
28111 reg = gen_rtx_REG (Pmode, 0);
28112 START_USE (0);
28113 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
28114 RTX_FRAME_RELATED_P (insn) = 1;
28116 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
28117 | SAVE_NOINLINE_FPRS_SAVES_LR)))
28119 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28120 GEN_INT (info->lr_save_offset + frame_off));
28121 mem = gen_rtx_MEM (Pmode, addr);
28122 /* This should not be of rs6000_sr_alias_set, because of
28123 __builtin_return_address. */
28125 insn = emit_move_insn (mem, reg);
28126 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28127 NULL_RTX, NULL_RTX);
28128 END_USE (0);
28132 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
28133 r12 will be needed by out-of-line gpr restore. */
28134 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28135 && !(strategy & (SAVE_INLINE_GPRS
28136 | SAVE_NOINLINE_GPRS_SAVES_LR))
28137 ? 11 : 12);
28138 if (!WORLD_SAVE_P (info)
28139 && info->cr_save_p
28140 && REGNO (frame_reg_rtx) != cr_save_regno
28141 && !(using_static_chain_p && cr_save_regno == 11)
28142 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
28144 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
28145 START_USE (cr_save_regno);
28146 rs6000_emit_move_from_cr (cr_save_rtx);
28149 /* Do any required saving of fpr's. If only one or two to save, do
28150 it ourselves. Otherwise, call function. */
28151 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
28153 int i;
28154 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28155 if (save_reg_p (info->first_fp_reg_save + i))
28156 emit_frame_save (frame_reg_rtx,
28157 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28158 ? DFmode : SFmode),
28159 info->first_fp_reg_save + i,
28160 info->fp_save_offset + frame_off + 8 * i,
28161 sp_off - frame_off);
28163 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
28165 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28166 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28167 unsigned ptr_regno = ptr_regno_for_savres (sel);
28168 rtx ptr_reg = frame_reg_rtx;
28170 if (REGNO (frame_reg_rtx) == ptr_regno)
28171 gcc_checking_assert (frame_off == 0);
28172 else
28174 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28175 NOT_INUSE (ptr_regno);
28176 emit_insn (gen_add3_insn (ptr_reg,
28177 frame_reg_rtx, GEN_INT (frame_off)));
28179 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28180 info->fp_save_offset,
28181 info->lr_save_offset,
28182 DFmode, sel);
28183 rs6000_frame_related (insn, ptr_reg, sp_off,
28184 NULL_RTX, NULL_RTX);
28185 if (lr)
28186 END_USE (0);
28189 /* Save GPRs. This is done as a PARALLEL if we are using
28190 the store-multiple instructions. */
28191 if (!WORLD_SAVE_P (info)
28192 && TARGET_SPE_ABI
28193 && info->spe_64bit_regs_used != 0
28194 && info->first_gp_reg_save != 32)
28196 int i;
28197 rtx spe_save_area_ptr;
28198 HOST_WIDE_INT save_off;
28199 int ool_adjust = 0;
28201 /* Determine whether we can address all of the registers that need
28202 to be saved with an offset from frame_reg_rtx that fits in
28203 the small const field for SPE memory instructions. */
28204 int spe_regs_addressable
28205 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28206 + reg_size * (32 - info->first_gp_reg_save - 1))
28207 && (strategy & SAVE_INLINE_GPRS));
28209 if (spe_regs_addressable)
28211 spe_save_area_ptr = frame_reg_rtx;
28212 save_off = frame_off;
28214 else
28216 /* Make r11 point to the start of the SPE save area. We need
28217 to be careful here if r11 is holding the static chain. If
28218 it is, then temporarily save it in r0. */
28219 HOST_WIDE_INT offset;
28221 if (!(strategy & SAVE_INLINE_GPRS))
28222 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28223 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
28224 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
28225 save_off = frame_off - offset;
28227 if (using_static_chain_p)
28229 rtx r0 = gen_rtx_REG (Pmode, 0);
28231 START_USE (0);
28232 gcc_assert (info->first_gp_reg_save > 11);
28234 emit_move_insn (r0, spe_save_area_ptr);
28236 else if (REGNO (frame_reg_rtx) != 11)
28237 START_USE (11);
28239 emit_insn (gen_addsi3 (spe_save_area_ptr,
28240 frame_reg_rtx, GEN_INT (offset)));
28241 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
28242 frame_off = -info->spe_gp_save_offset + ool_adjust;
28245 if ((strategy & SAVE_INLINE_GPRS))
28247 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28248 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28249 emit_frame_save (spe_save_area_ptr, reg_mode,
28250 info->first_gp_reg_save + i,
28251 (info->spe_gp_save_offset + save_off
28252 + reg_size * i),
28253 sp_off - save_off);
28255 else
28257 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
28258 info->spe_gp_save_offset + save_off,
28259 0, reg_mode,
28260 SAVRES_SAVE | SAVRES_GPR);
28262 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
28263 NULL_RTX, NULL_RTX);
28266 /* Move the static chain pointer back. */
28267 if (!spe_regs_addressable)
28269 if (using_static_chain_p)
28271 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
28272 END_USE (0);
28274 else if (REGNO (frame_reg_rtx) != 11)
28275 END_USE (11);
28278 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
28280 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
28281 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
28282 unsigned ptr_regno = ptr_regno_for_savres (sel);
28283 rtx ptr_reg = frame_reg_rtx;
28284 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
28285 int end_save = info->gp_save_offset + info->gp_size;
28286 int ptr_off;
28288 if (ptr_regno == 12)
28289 sp_adjust = 0;
28290 if (!ptr_set_up)
28291 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28293 /* Need to adjust r11 (r12) if we saved any FPRs. */
28294 if (end_save + frame_off != 0)
28296 rtx offset = GEN_INT (end_save + frame_off);
28298 if (ptr_set_up)
28299 frame_off = -end_save;
28300 else
28301 NOT_INUSE (ptr_regno);
28302 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28304 else if (!ptr_set_up)
28306 NOT_INUSE (ptr_regno);
28307 emit_move_insn (ptr_reg, frame_reg_rtx);
28309 ptr_off = -end_save;
28310 insn = rs6000_emit_savres_rtx (info, ptr_reg,
28311 info->gp_save_offset + ptr_off,
28312 info->lr_save_offset + ptr_off,
28313 reg_mode, sel);
28314 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
28315 NULL_RTX, NULL_RTX);
28316 if (lr)
28317 END_USE (0);
28319 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
28321 rtvec p;
28322 int i;
28323 p = rtvec_alloc (32 - info->first_gp_reg_save);
28324 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28325 RTVEC_ELT (p, i)
28326 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28327 frame_reg_rtx,
28328 info->gp_save_offset + frame_off + reg_size * i);
28329 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28330 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28331 NULL_RTX, NULL_RTX);
28333 else if (!WORLD_SAVE_P (info))
28335 int offset = info->gp_save_offset + frame_off;
28336 for (int i = info->first_gp_reg_save; i < 32; i++)
28338 if (rs6000_reg_live_or_pic_offset_p (i)
28339 && !cfun->machine->gpr_is_wrapped_separately[i])
28340 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
28341 sp_off - frame_off);
28343 offset += reg_size;
28347 if (crtl->calls_eh_return)
28349 unsigned int i;
28350 rtvec p;
28352 for (i = 0; ; ++i)
28354 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28355 if (regno == INVALID_REGNUM)
28356 break;
28359 p = rtvec_alloc (i);
28361 for (i = 0; ; ++i)
28363 unsigned int regno = EH_RETURN_DATA_REGNO (i);
28364 if (regno == INVALID_REGNUM)
28365 break;
28367 rtx set
28368 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
28369 sp_reg_rtx,
28370 info->ehrd_offset + sp_off + reg_size * (int) i);
28371 RTVEC_ELT (p, i) = set;
28372 RTX_FRAME_RELATED_P (set) = 1;
28375 insn = emit_insn (gen_blockage ());
28376 RTX_FRAME_RELATED_P (insn) = 1;
28377 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
28380 /* In AIX ABI we need to make sure r2 is really saved. */
28381 if (TARGET_AIX && crtl->calls_eh_return)
28383 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
28384 rtx join_insn, note;
28385 rtx_insn *save_insn;
28386 long toc_restore_insn;
28388 tmp_reg = gen_rtx_REG (Pmode, 11);
28389 tmp_reg_si = gen_rtx_REG (SImode, 11);
28390 if (using_static_chain_p)
28392 START_USE (0);
28393 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
28395 else
28396 START_USE (11);
28397 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
28398 /* Peek at instruction to which this function returns. If it's
28399 restoring r2, then we know we've already saved r2. We can't
28400 unconditionally save r2 because the value we have will already
28401 be updated if we arrived at this function via a plt call or
28402 toc adjusting stub. */
28403 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
28404 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
28405 + RS6000_TOC_SAVE_SLOT);
28406 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
28407 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
28408 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
28409 validate_condition_mode (EQ, CCUNSmode);
28410 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
28411 emit_insn (gen_rtx_SET (compare_result,
28412 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
28413 toc_save_done = gen_label_rtx ();
28414 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28415 gen_rtx_EQ (VOIDmode, compare_result,
28416 const0_rtx),
28417 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
28418 pc_rtx);
28419 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28420 JUMP_LABEL (jump) = toc_save_done;
28421 LABEL_NUSES (toc_save_done) += 1;
28423 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
28424 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
28425 sp_off - frame_off);
28427 emit_label (toc_save_done);
28429 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
28430 have a CFG that has different saves along different paths.
28431 Move the note to a dummy blockage insn, which describes that
28432 R2 is unconditionally saved after the label. */
28433 /* ??? An alternate representation might be a special insn pattern
28434 containing both the branch and the store. That might let the
28435 code that minimizes the number of DW_CFA_advance opcodes better
28436 freedom in placing the annotations. */
28437 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
28438 if (note)
28439 remove_note (save_insn, note);
28440 else
28441 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
28442 copy_rtx (PATTERN (save_insn)), NULL_RTX);
28443 RTX_FRAME_RELATED_P (save_insn) = 0;
28445 join_insn = emit_insn (gen_blockage ());
28446 REG_NOTES (join_insn) = note;
28447 RTX_FRAME_RELATED_P (join_insn) = 1;
28449 if (using_static_chain_p)
28451 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
28452 END_USE (0);
28454 else
28455 END_USE (11);
28458 /* Save CR if we use any that must be preserved. */
28459 if (!WORLD_SAVE_P (info) && info->cr_save_p)
28461 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
28462 GEN_INT (info->cr_save_offset + frame_off));
28463 rtx mem = gen_frame_mem (SImode, addr);
28465 /* If we didn't copy cr before, do so now using r0. */
28466 if (cr_save_rtx == NULL_RTX)
28468 START_USE (0);
28469 cr_save_rtx = gen_rtx_REG (SImode, 0);
28470 rs6000_emit_move_from_cr (cr_save_rtx);
28473 /* Saving CR requires a two-instruction sequence: one instruction
28474 to move the CR to a general-purpose register, and a second
28475 instruction that stores the GPR to memory.
28477 We do not emit any DWARF CFI records for the first of these,
28478 because we cannot properly represent the fact that CR is saved in
28479 a register. One reason is that we cannot express that multiple
28480 CR fields are saved; another reason is that on 64-bit, the size
28481 of the CR register in DWARF (4 bytes) differs from the size of
28482 a general-purpose register.
28484 This means if any intervening instruction were to clobber one of
28485 the call-saved CR fields, we'd have incorrect CFI. To prevent
28486 this from happening, we mark the store to memory as a use of
28487 those CR fields, which prevents any such instruction from being
28488 scheduled in between the two instructions. */
28489 rtx crsave_v[9];
28490 int n_crsave = 0;
28491 int i;
28493 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
28494 for (i = 0; i < 8; i++)
28495 if (save_reg_p (CR0_REGNO + i))
28496 crsave_v[n_crsave++]
28497 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28499 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
28500 gen_rtvec_v (n_crsave, crsave_v)));
28501 END_USE (REGNO (cr_save_rtx));
28503 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
28504 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
28505 so we need to construct a frame expression manually. */
28506 RTX_FRAME_RELATED_P (insn) = 1;
28508 /* Update address to be stack-pointer relative, like
28509 rs6000_frame_related would do. */
28510 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28511 GEN_INT (info->cr_save_offset + sp_off));
28512 mem = gen_frame_mem (SImode, addr);
28514 if (DEFAULT_ABI == ABI_ELFv2)
28516 /* In the ELFv2 ABI we generate separate CFI records for each
28517 CR field that was actually saved. They all point to the
28518 same 32-bit stack slot. */
28519 rtx crframe[8];
28520 int n_crframe = 0;
28522 for (i = 0; i < 8; i++)
28523 if (save_reg_p (CR0_REGNO + i))
28525 crframe[n_crframe]
28526 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
28528 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
28529 n_crframe++;
28532 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28533 gen_rtx_PARALLEL (VOIDmode,
28534 gen_rtvec_v (n_crframe, crframe)));
28536 else
28538 /* In other ABIs, by convention, we use a single CR regnum to
28539 represent the fact that all call-saved CR fields are saved.
28540 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
28541 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
28542 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
28546 /* In the ELFv2 ABI we need to save all call-saved CR fields into
28547 *separate* slots if the routine calls __builtin_eh_return, so
28548 that they can be independently restored by the unwinder. */
28549 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28551 int i, cr_off = info->ehcr_offset;
28552 rtx crsave;
28554 /* ??? We might get better performance by using multiple mfocrf
28555 instructions. */
28556 crsave = gen_rtx_REG (SImode, 0);
28557 emit_insn (gen_movesi_from_cr (crsave));
28559 for (i = 0; i < 8; i++)
28560 if (!call_used_regs[CR0_REGNO + i])
28562 rtvec p = rtvec_alloc (2);
28563 RTVEC_ELT (p, 0)
28564 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
28565 RTVEC_ELT (p, 1)
28566 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28568 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28570 RTX_FRAME_RELATED_P (insn) = 1;
28571 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28572 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
28573 sp_reg_rtx, cr_off + sp_off));
28575 cr_off += reg_size;
28579 /* Update stack and set back pointer unless this is V.4,
28580 for which it was done previously. */
28581 if (!WORLD_SAVE_P (info) && info->push_p
28582 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
28584 rtx ptr_reg = NULL;
28585 int ptr_off = 0;
28587 /* If saving altivec regs we need to be able to address all save
28588 locations using a 16-bit offset. */
28589 if ((strategy & SAVE_INLINE_VRS) == 0
28590 || (info->altivec_size != 0
28591 && (info->altivec_save_offset + info->altivec_size - 16
28592 + info->total_size - frame_off) > 32767)
28593 || (info->vrsave_size != 0
28594 && (info->vrsave_save_offset
28595 + info->total_size - frame_off) > 32767))
28597 int sel = SAVRES_SAVE | SAVRES_VR;
28598 unsigned ptr_regno = ptr_regno_for_savres (sel);
28600 if (using_static_chain_p
28601 && ptr_regno == STATIC_CHAIN_REGNUM)
28602 ptr_regno = 12;
28603 if (REGNO (frame_reg_rtx) != ptr_regno)
28604 START_USE (ptr_regno);
28605 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28606 frame_reg_rtx = ptr_reg;
28607 ptr_off = info->altivec_save_offset + info->altivec_size;
28608 frame_off = -ptr_off;
28610 else if (REGNO (frame_reg_rtx) == 1)
28611 frame_off = info->total_size;
28612 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28613 ptr_reg, ptr_off);
28614 if (REGNO (frame_reg_rtx) == 12)
28615 sp_adjust = 0;
28616 sp_off = info->total_size;
28617 if (frame_reg_rtx != sp_reg_rtx)
28618 rs6000_emit_stack_tie (frame_reg_rtx, false);
28621 /* Set frame pointer, if needed. */
28622 if (frame_pointer_needed)
28624 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
28625 sp_reg_rtx);
28626 RTX_FRAME_RELATED_P (insn) = 1;
28629 /* Save AltiVec registers if needed. Save here because the red zone does
28630 not always include AltiVec registers. */
28631 if (!WORLD_SAVE_P (info)
28632 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
28634 int end_save = info->altivec_save_offset + info->altivec_size;
28635 int ptr_off;
28636 /* Oddly, the vector save/restore functions point r0 at the end
28637 of the save area, then use r11 or r12 to load offsets for
28638 [reg+reg] addressing. */
28639 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28640 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
28641 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28643 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28644 NOT_INUSE (0);
28645 if (scratch_regno == 12)
28646 sp_adjust = 0;
28647 if (end_save + frame_off != 0)
28649 rtx offset = GEN_INT (end_save + frame_off);
28651 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28653 else
28654 emit_move_insn (ptr_reg, frame_reg_rtx);
28656 ptr_off = -end_save;
28657 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28658 info->altivec_save_offset + ptr_off,
28659 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
28660 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
28661 NULL_RTX, NULL_RTX);
28662 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28664 /* The oddity mentioned above clobbered our frame reg. */
28665 emit_move_insn (frame_reg_rtx, ptr_reg);
28666 frame_off = ptr_off;
28669 else if (!WORLD_SAVE_P (info)
28670 && info->altivec_size != 0)
28672 int i;
28674 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28675 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28677 rtx areg, savereg, mem;
28678 HOST_WIDE_INT offset;
28680 offset = (info->altivec_save_offset + frame_off
28681 + 16 * (i - info->first_altivec_reg_save));
28683 savereg = gen_rtx_REG (V4SImode, i);
28685 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28687 mem = gen_frame_mem (V4SImode,
28688 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28689 GEN_INT (offset)));
28690 insn = emit_insn (gen_rtx_SET (mem, savereg));
28691 areg = NULL_RTX;
28693 else
28695 NOT_INUSE (0);
28696 areg = gen_rtx_REG (Pmode, 0);
28697 emit_move_insn (areg, GEN_INT (offset));
28699 /* AltiVec addressing mode is [reg+reg]. */
28700 mem = gen_frame_mem (V4SImode,
28701 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
28703 /* Rather than emitting a generic move, force use of the stvx
28704 instruction, which we always want on ISA 2.07 (power8) systems.
28705 In particular we don't want xxpermdi/stxvd2x for little
28706 endian. */
28707 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
28710 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28711 areg, GEN_INT (offset));
28715 /* VRSAVE is a bit vector representing which AltiVec registers
28716 are used. The OS uses this to determine which vector
28717 registers to save on a context switch. We need to save
28718 VRSAVE on the stack frame, add whatever AltiVec registers we
28719 used in this function, and do the corresponding magic in the
28720 epilogue. */
28722 if (!WORLD_SAVE_P (info)
28723 && info->vrsave_size != 0)
28725 rtx reg, vrsave;
28726 int offset;
28727 int save_regno;
28729 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
28730 be using r12 as frame_reg_rtx and r11 as the static chain
28731 pointer for nested functions. */
28732 save_regno = 12;
28733 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28734 && !using_static_chain_p)
28735 save_regno = 11;
28736 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
28738 save_regno = 11;
28739 if (using_static_chain_p)
28740 save_regno = 0;
28743 NOT_INUSE (save_regno);
28744 reg = gen_rtx_REG (SImode, save_regno);
28745 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28746 if (TARGET_MACHO)
28747 emit_insn (gen_get_vrsave_internal (reg));
28748 else
28749 emit_insn (gen_rtx_SET (reg, vrsave));
28751 /* Save VRSAVE. */
28752 offset = info->vrsave_save_offset + frame_off;
28753 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
28755 /* Include the registers in the mask. */
28756 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
28758 insn = emit_insn (generate_set_vrsave (reg, info, 0));
28761 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
28762 if (!TARGET_SINGLE_PIC_BASE
28763 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
28764 || (DEFAULT_ABI == ABI_V4
28765 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
28766 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
28768 /* If emit_load_toc_table will use the link register, we need to save
28769 it. We use R12 for this purpose because emit_load_toc_table
28770 can use register 0. This allows us to use a plain 'blr' to return
28771 from the procedure more often. */
28772 int save_LR_around_toc_setup = (TARGET_ELF
28773 && DEFAULT_ABI == ABI_V4
28774 && flag_pic
28775 && ! info->lr_save_p
28776 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
28777 if (save_LR_around_toc_setup)
28779 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28780 rtx tmp = gen_rtx_REG (Pmode, 12);
28782 sp_adjust = 0;
28783 insn = emit_move_insn (tmp, lr);
28784 RTX_FRAME_RELATED_P (insn) = 1;
28786 rs6000_emit_load_toc_table (TRUE);
28788 insn = emit_move_insn (lr, tmp);
28789 add_reg_note (insn, REG_CFA_RESTORE, lr);
28790 RTX_FRAME_RELATED_P (insn) = 1;
28792 else
28793 rs6000_emit_load_toc_table (TRUE);
28796 #if TARGET_MACHO
28797 if (!TARGET_SINGLE_PIC_BASE
28798 && DEFAULT_ABI == ABI_DARWIN
28799 && flag_pic && crtl->uses_pic_offset_table)
28801 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28802 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
28804 /* Save and restore LR locally around this call (in R0). */
28805 if (!info->lr_save_p)
28806 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
28808 emit_insn (gen_load_macho_picbase (src));
28810 emit_move_insn (gen_rtx_REG (Pmode,
28811 RS6000_PIC_OFFSET_TABLE_REGNUM),
28812 lr);
28814 if (!info->lr_save_p)
28815 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
28817 #endif
28819 /* If we need to, save the TOC register after doing the stack setup.
28820 Do not emit eh frame info for this save. The unwinder wants info,
28821 conceptually attached to instructions in this function, about
28822 register values in the caller of this function. This R2 may have
28823 already been changed from the value in the caller.
28824 We don't attempt to write accurate DWARF EH frame info for R2
28825 because code emitted by gcc for a (non-pointer) function call
28826 doesn't save and restore R2. Instead, R2 is managed out-of-line
28827 by a linker generated plt call stub when the function resides in
28828 a shared library. This behavior is costly to describe in DWARF,
28829 both in terms of the size of DWARF info and the time taken in the
28830 unwinder to interpret it. R2 changes, apart from the
28831 calls_eh_return case earlier in this function, are handled by
28832 linux-unwind.h frob_update_context. */
28833 if (rs6000_save_toc_in_prologue_p ())
28835 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
28836 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
28839 if (using_split_stack && split_stack_arg_pointer_used_p ())
28841 /* Set up the arg pointer (r12) for -fsplit-stack code. If
28842 __morestack was called, it left the arg pointer to the old
28843 stack in r29. Otherwise, the arg pointer is the top of the
28844 current frame. */
28845 cfun->machine->split_stack_argp_used = true;
28846 if (sp_adjust)
28848 rtx r12 = gen_rtx_REG (Pmode, 12);
28849 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
28850 emit_insn_before (set_r12, sp_adjust);
28852 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
28854 rtx r12 = gen_rtx_REG (Pmode, 12);
28855 if (frame_off == 0)
28856 emit_move_insn (r12, frame_reg_rtx);
28857 else
28858 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
28860 if (info->push_p)
28862 rtx r12 = gen_rtx_REG (Pmode, 12);
28863 rtx r29 = gen_rtx_REG (Pmode, 29);
28864 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28865 rtx not_more = gen_label_rtx ();
28866 rtx jump;
28868 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28869 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
28870 gen_rtx_LABEL_REF (VOIDmode, not_more),
28871 pc_rtx);
28872 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28873 JUMP_LABEL (jump) = not_more;
28874 LABEL_NUSES (not_more) += 1;
28875 emit_move_insn (r12, r29);
28876 emit_label (not_more);
28881 /* Output .extern statements for the save/restore routines we use. */
28883 static void
28884 rs6000_output_savres_externs (FILE *file)
28886 rs6000_stack_t *info = rs6000_stack_info ();
28888 if (TARGET_DEBUG_STACK)
28889 debug_stack_info (info);
28891 /* Write .extern for any function we will call to save and restore
28892 fp values. */
28893 if (info->first_fp_reg_save < 64
28894 && !TARGET_MACHO
28895 && !TARGET_ELF)
28897 char *name;
28898 int regno = info->first_fp_reg_save - 32;
28900 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
28902 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28903 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28904 name = rs6000_savres_routine_name (info, regno, sel);
28905 fprintf (file, "\t.extern %s\n", name);
28907 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
28909 bool lr = (info->savres_strategy
28910 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28911 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28912 name = rs6000_savres_routine_name (info, regno, sel);
28913 fprintf (file, "\t.extern %s\n", name);
28918 /* Write function prologue. */
28920 static void
28921 rs6000_output_function_prologue (FILE *file,
28922 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28924 if (!cfun->is_thunk)
28925 rs6000_output_savres_externs (file);
28927 /* ELFv2 ABI r2 setup code and local entry point. This must follow
28928 immediately after the global entry point label. */
28929 if (rs6000_global_entry_point_needed_p ())
28931 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28933 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
28935 if (TARGET_CMODEL != CMODEL_LARGE)
28937 /* In the small and medium code models, we assume the TOC is less
28938 2 GB away from the text section, so it can be computed via the
28939 following two-instruction sequence. */
28940 char buf[256];
28942 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28943 fprintf (file, "0:\taddis 2,12,.TOC.-");
28944 assemble_name (file, buf);
28945 fprintf (file, "@ha\n");
28946 fprintf (file, "\taddi 2,2,.TOC.-");
28947 assemble_name (file, buf);
28948 fprintf (file, "@l\n");
28950 else
28952 /* In the large code model, we allow arbitrary offsets between the
28953 TOC and the text section, so we have to load the offset from
28954 memory. The data field is emitted directly before the global
28955 entry point in rs6000_elf_declare_function_name. */
28956 char buf[256];
28958 #ifdef HAVE_AS_ENTRY_MARKERS
28959 /* If supported by the linker, emit a marker relocation. If the
28960 total code size of the final executable or shared library
28961 happens to fit into 2 GB after all, the linker will replace
28962 this code sequence with the sequence for the small or medium
28963 code model. */
28964 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
28965 #endif
28966 fprintf (file, "\tld 2,");
28967 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28968 assemble_name (file, buf);
28969 fprintf (file, "-");
28970 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28971 assemble_name (file, buf);
28972 fprintf (file, "(12)\n");
28973 fprintf (file, "\tadd 2,2,12\n");
28976 fputs ("\t.localentry\t", file);
28977 assemble_name (file, name);
28978 fputs (",.-", file);
28979 assemble_name (file, name);
28980 fputs ("\n", file);
28983 /* Output -mprofile-kernel code. This needs to be done here instead of
28984 in output_function_profile since it must go after the ELFv2 ABI
28985 local entry point. */
28986 if (TARGET_PROFILE_KERNEL && crtl->profile)
28988 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28989 gcc_assert (!TARGET_32BIT);
28991 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28993 /* In the ELFv2 ABI we have no compiler stack word. It must be
28994 the resposibility of _mcount to preserve the static chain
28995 register if required. */
28996 if (DEFAULT_ABI != ABI_ELFv2
28997 && cfun->static_chain_decl != NULL)
28999 asm_fprintf (file, "\tstd %s,24(%s)\n",
29000 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29001 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29002 asm_fprintf (file, "\tld %s,24(%s)\n",
29003 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
29005 else
29006 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
29009 rs6000_pic_labelno++;
29012 /* -mprofile-kernel code calls mcount before the function prolog,
29013 so a profiled leaf function should stay a leaf function. */
29014 static bool
29015 rs6000_keep_leaf_when_profiled ()
29017 return TARGET_PROFILE_KERNEL;
29020 /* Non-zero if vmx regs are restored before the frame pop, zero if
29021 we restore after the pop when possible. */
29022 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
29024 /* Restoring cr is a two step process: loading a reg from the frame
29025 save, then moving the reg to cr. For ABI_V4 we must let the
29026 unwinder know that the stack location is no longer valid at or
29027 before the stack deallocation, but we can't emit a cfa_restore for
29028 cr at the stack deallocation like we do for other registers.
29029 The trouble is that it is possible for the move to cr to be
29030 scheduled after the stack deallocation. So say exactly where cr
29031 is located on each of the two insns. */
29033 static rtx
29034 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
29036 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
29037 rtx reg = gen_rtx_REG (SImode, regno);
29038 rtx_insn *insn = emit_move_insn (reg, mem);
29040 if (!exit_func && DEFAULT_ABI == ABI_V4)
29042 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29043 rtx set = gen_rtx_SET (reg, cr);
29045 add_reg_note (insn, REG_CFA_REGISTER, set);
29046 RTX_FRAME_RELATED_P (insn) = 1;
29048 return reg;
29051 /* Reload CR from REG. */
29053 static void
29054 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
29056 int count = 0;
29057 int i;
29059 if (using_mfcr_multiple)
29061 for (i = 0; i < 8; i++)
29062 if (save_reg_p (CR0_REGNO + i))
29063 count++;
29064 gcc_assert (count);
29067 if (using_mfcr_multiple && count > 1)
29069 rtx_insn *insn;
29070 rtvec p;
29071 int ndx;
29073 p = rtvec_alloc (count);
29075 ndx = 0;
29076 for (i = 0; i < 8; i++)
29077 if (save_reg_p (CR0_REGNO + i))
29079 rtvec r = rtvec_alloc (2);
29080 RTVEC_ELT (r, 0) = reg;
29081 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
29082 RTVEC_ELT (p, ndx) =
29083 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
29084 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
29085 ndx++;
29087 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29088 gcc_assert (ndx == count);
29090 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29091 CR field separately. */
29092 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29094 for (i = 0; i < 8; i++)
29095 if (save_reg_p (CR0_REGNO + i))
29096 add_reg_note (insn, REG_CFA_RESTORE,
29097 gen_rtx_REG (SImode, CR0_REGNO + i));
29099 RTX_FRAME_RELATED_P (insn) = 1;
29102 else
29103 for (i = 0; i < 8; i++)
29104 if (save_reg_p (CR0_REGNO + i))
29106 rtx insn = emit_insn (gen_movsi_to_cr_one
29107 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29109 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
29110 CR field separately, attached to the insn that in fact
29111 restores this particular CR field. */
29112 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
29114 add_reg_note (insn, REG_CFA_RESTORE,
29115 gen_rtx_REG (SImode, CR0_REGNO + i));
29117 RTX_FRAME_RELATED_P (insn) = 1;
29121 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
29122 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
29123 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29125 rtx_insn *insn = get_last_insn ();
29126 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
29128 add_reg_note (insn, REG_CFA_RESTORE, cr);
29129 RTX_FRAME_RELATED_P (insn) = 1;
29133 /* Like cr, the move to lr instruction can be scheduled after the
29134 stack deallocation, but unlike cr, its stack frame save is still
29135 valid. So we only need to emit the cfa_restore on the correct
29136 instruction. */
29138 static void
29139 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
29141 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
29142 rtx reg = gen_rtx_REG (Pmode, regno);
29144 emit_move_insn (reg, mem);
29147 static void
29148 restore_saved_lr (int regno, bool exit_func)
29150 rtx reg = gen_rtx_REG (Pmode, regno);
29151 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29152 rtx_insn *insn = emit_move_insn (lr, reg);
29154 if (!exit_func && flag_shrink_wrap)
29156 add_reg_note (insn, REG_CFA_RESTORE, lr);
29157 RTX_FRAME_RELATED_P (insn) = 1;
29161 static rtx
29162 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
29164 if (DEFAULT_ABI == ABI_ELFv2)
29166 int i;
29167 for (i = 0; i < 8; i++)
29168 if (save_reg_p (CR0_REGNO + i))
29170 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
29171 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
29172 cfa_restores);
29175 else if (info->cr_save_p)
29176 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29177 gen_rtx_REG (SImode, CR2_REGNO),
29178 cfa_restores);
29180 if (info->lr_save_p)
29181 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29182 gen_rtx_REG (Pmode, LR_REGNO),
29183 cfa_restores);
29184 return cfa_restores;
29187 /* Return true if OFFSET from stack pointer can be clobbered by signals.
29188 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
29189 below stack pointer not cloberred by signals. */
29191 static inline bool
29192 offset_below_red_zone_p (HOST_WIDE_INT offset)
29194 return offset < (DEFAULT_ABI == ABI_V4
29196 : TARGET_32BIT ? -220 : -288);
29199 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
29201 static void
29202 emit_cfa_restores (rtx cfa_restores)
29204 rtx_insn *insn = get_last_insn ();
29205 rtx *loc = &REG_NOTES (insn);
29207 while (*loc)
29208 loc = &XEXP (*loc, 1);
29209 *loc = cfa_restores;
29210 RTX_FRAME_RELATED_P (insn) = 1;
29213 /* Emit function epilogue as insns. */
29215 void
29216 rs6000_emit_epilogue (int sibcall)
29218 rs6000_stack_t *info;
29219 int restoring_GPRs_inline;
29220 int restoring_FPRs_inline;
29221 int using_load_multiple;
29222 int using_mtcr_multiple;
29223 int use_backchain_to_restore_sp;
29224 int restore_lr;
29225 int strategy;
29226 HOST_WIDE_INT frame_off = 0;
29227 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
29228 rtx frame_reg_rtx = sp_reg_rtx;
29229 rtx cfa_restores = NULL_RTX;
29230 rtx insn;
29231 rtx cr_save_reg = NULL_RTX;
29232 machine_mode reg_mode = Pmode;
29233 int reg_size = TARGET_32BIT ? 4 : 8;
29234 int i;
29235 bool exit_func;
29236 unsigned ptr_regno;
29238 info = rs6000_stack_info ();
29240 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29242 reg_mode = V2SImode;
29243 reg_size = 8;
29246 strategy = info->savres_strategy;
29247 using_load_multiple = strategy & REST_MULTIPLE;
29248 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
29249 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
29250 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
29251 || rs6000_cpu == PROCESSOR_PPC603
29252 || rs6000_cpu == PROCESSOR_PPC750
29253 || optimize_size);
29254 /* Restore via the backchain when we have a large frame, since this
29255 is more efficient than an addis, addi pair. The second condition
29256 here will not trigger at the moment; We don't actually need a
29257 frame pointer for alloca, but the generic parts of the compiler
29258 give us one anyway. */
29259 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
29260 ? info->lr_save_offset
29261 : 0) > 32767
29262 || (cfun->calls_alloca
29263 && !frame_pointer_needed));
29264 restore_lr = (info->lr_save_p
29265 && (restoring_FPRs_inline
29266 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
29267 && (restoring_GPRs_inline
29268 || info->first_fp_reg_save < 64)
29269 && !cfun->machine->lr_is_wrapped_separately);
29272 if (WORLD_SAVE_P (info))
29274 int i, j;
29275 char rname[30];
29276 const char *alloc_rname;
29277 rtvec p;
29279 /* eh_rest_world_r10 will return to the location saved in the LR
29280 stack slot (which is not likely to be our caller.)
29281 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
29282 rest_world is similar, except any R10 parameter is ignored.
29283 The exception-handling stuff that was here in 2.95 is no
29284 longer necessary. */
29286 p = rtvec_alloc (9
29287 + 32 - info->first_gp_reg_save
29288 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
29289 + 63 + 1 - info->first_fp_reg_save);
29291 strcpy (rname, ((crtl->calls_eh_return) ?
29292 "*eh_rest_world_r10" : "*rest_world"));
29293 alloc_rname = ggc_strdup (rname);
29295 j = 0;
29296 RTVEC_ELT (p, j++) = ret_rtx;
29297 RTVEC_ELT (p, j++)
29298 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
29299 /* The instruction pattern requires a clobber here;
29300 it is shared with the restVEC helper. */
29301 RTVEC_ELT (p, j++)
29302 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
29305 /* CR register traditionally saved as CR2. */
29306 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
29307 RTVEC_ELT (p, j++)
29308 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
29309 if (flag_shrink_wrap)
29311 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
29312 gen_rtx_REG (Pmode, LR_REGNO),
29313 cfa_restores);
29314 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29318 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29320 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29321 RTVEC_ELT (p, j++)
29322 = gen_frame_load (reg,
29323 frame_reg_rtx, info->gp_save_offset + reg_size * i);
29324 if (flag_shrink_wrap)
29325 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29327 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29329 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
29330 RTVEC_ELT (p, j++)
29331 = gen_frame_load (reg,
29332 frame_reg_rtx, info->altivec_save_offset + 16 * i);
29333 if (flag_shrink_wrap)
29334 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29336 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
29338 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29339 ? DFmode : SFmode),
29340 info->first_fp_reg_save + i);
29341 RTVEC_ELT (p, j++)
29342 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
29343 if (flag_shrink_wrap)
29344 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29346 RTVEC_ELT (p, j++)
29347 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
29348 RTVEC_ELT (p, j++)
29349 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
29350 RTVEC_ELT (p, j++)
29351 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
29352 RTVEC_ELT (p, j++)
29353 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
29354 RTVEC_ELT (p, j++)
29355 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
29356 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29358 if (flag_shrink_wrap)
29360 REG_NOTES (insn) = cfa_restores;
29361 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29362 RTX_FRAME_RELATED_P (insn) = 1;
29364 return;
29367 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
29368 if (info->push_p)
29369 frame_off = info->total_size;
29371 /* Restore AltiVec registers if we must do so before adjusting the
29372 stack. */
29373 if (info->altivec_size != 0
29374 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29375 || (DEFAULT_ABI != ABI_V4
29376 && offset_below_red_zone_p (info->altivec_save_offset))))
29378 int i;
29379 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29381 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
29382 if (use_backchain_to_restore_sp)
29384 int frame_regno = 11;
29386 if ((strategy & REST_INLINE_VRS) == 0)
29388 /* Of r11 and r12, select the one not clobbered by an
29389 out-of-line restore function for the frame register. */
29390 frame_regno = 11 + 12 - scratch_regno;
29392 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
29393 emit_move_insn (frame_reg_rtx,
29394 gen_rtx_MEM (Pmode, sp_reg_rtx));
29395 frame_off = 0;
29397 else if (frame_pointer_needed)
29398 frame_reg_rtx = hard_frame_pointer_rtx;
29400 if ((strategy & REST_INLINE_VRS) == 0)
29402 int end_save = info->altivec_save_offset + info->altivec_size;
29403 int ptr_off;
29404 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29405 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29407 if (end_save + frame_off != 0)
29409 rtx offset = GEN_INT (end_save + frame_off);
29411 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29413 else
29414 emit_move_insn (ptr_reg, frame_reg_rtx);
29416 ptr_off = -end_save;
29417 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29418 info->altivec_save_offset + ptr_off,
29419 0, V4SImode, SAVRES_VR);
29421 else
29423 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29424 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29426 rtx addr, areg, mem, insn;
29427 rtx reg = gen_rtx_REG (V4SImode, i);
29428 HOST_WIDE_INT offset
29429 = (info->altivec_save_offset + frame_off
29430 + 16 * (i - info->first_altivec_reg_save));
29432 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29434 mem = gen_frame_mem (V4SImode,
29435 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29436 GEN_INT (offset)));
29437 insn = gen_rtx_SET (reg, mem);
29439 else
29441 areg = gen_rtx_REG (Pmode, 0);
29442 emit_move_insn (areg, GEN_INT (offset));
29444 /* AltiVec addressing mode is [reg+reg]. */
29445 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29446 mem = gen_frame_mem (V4SImode, addr);
29448 /* Rather than emitting a generic move, force use of the
29449 lvx instruction, which we always want. In particular we
29450 don't want lxvd2x/xxpermdi for little endian. */
29451 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29454 (void) emit_insn (insn);
29458 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29459 if (((strategy & REST_INLINE_VRS) == 0
29460 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29461 && (flag_shrink_wrap
29462 || (offset_below_red_zone_p
29463 (info->altivec_save_offset
29464 + 16 * (i - info->first_altivec_reg_save)))))
29466 rtx reg = gen_rtx_REG (V4SImode, i);
29467 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29471 /* Restore VRSAVE if we must do so before adjusting the stack. */
29472 if (info->vrsave_size != 0
29473 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29474 || (DEFAULT_ABI != ABI_V4
29475 && offset_below_red_zone_p (info->vrsave_save_offset))))
29477 rtx reg;
29479 if (frame_reg_rtx == sp_reg_rtx)
29481 if (use_backchain_to_restore_sp)
29483 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29484 emit_move_insn (frame_reg_rtx,
29485 gen_rtx_MEM (Pmode, sp_reg_rtx));
29486 frame_off = 0;
29488 else if (frame_pointer_needed)
29489 frame_reg_rtx = hard_frame_pointer_rtx;
29492 reg = gen_rtx_REG (SImode, 12);
29493 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29494 info->vrsave_save_offset + frame_off));
29496 emit_insn (generate_set_vrsave (reg, info, 1));
29499 insn = NULL_RTX;
29500 /* If we have a large stack frame, restore the old stack pointer
29501 using the backchain. */
29502 if (use_backchain_to_restore_sp)
29504 if (frame_reg_rtx == sp_reg_rtx)
29506 /* Under V.4, don't reset the stack pointer until after we're done
29507 loading the saved registers. */
29508 if (DEFAULT_ABI == ABI_V4)
29509 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29511 insn = emit_move_insn (frame_reg_rtx,
29512 gen_rtx_MEM (Pmode, sp_reg_rtx));
29513 frame_off = 0;
29515 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29516 && DEFAULT_ABI == ABI_V4)
29517 /* frame_reg_rtx has been set up by the altivec restore. */
29519 else
29521 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
29522 frame_reg_rtx = sp_reg_rtx;
29525 /* If we have a frame pointer, we can restore the old stack pointer
29526 from it. */
29527 else if (frame_pointer_needed)
29529 frame_reg_rtx = sp_reg_rtx;
29530 if (DEFAULT_ABI == ABI_V4)
29531 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29532 /* Prevent reordering memory accesses against stack pointer restore. */
29533 else if (cfun->calls_alloca
29534 || offset_below_red_zone_p (-info->total_size))
29535 rs6000_emit_stack_tie (frame_reg_rtx, true);
29537 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
29538 GEN_INT (info->total_size)));
29539 frame_off = 0;
29541 else if (info->push_p
29542 && DEFAULT_ABI != ABI_V4
29543 && !crtl->calls_eh_return)
29545 /* Prevent reordering memory accesses against stack pointer restore. */
29546 if (cfun->calls_alloca
29547 || offset_below_red_zone_p (-info->total_size))
29548 rs6000_emit_stack_tie (frame_reg_rtx, false);
29549 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
29550 GEN_INT (info->total_size)));
29551 frame_off = 0;
29553 if (insn && frame_reg_rtx == sp_reg_rtx)
29555 if (cfa_restores)
29557 REG_NOTES (insn) = cfa_restores;
29558 cfa_restores = NULL_RTX;
29560 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29561 RTX_FRAME_RELATED_P (insn) = 1;
29564 /* Restore AltiVec registers if we have not done so already. */
29565 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29566 && info->altivec_size != 0
29567 && (DEFAULT_ABI == ABI_V4
29568 || !offset_below_red_zone_p (info->altivec_save_offset)))
29570 int i;
29572 if ((strategy & REST_INLINE_VRS) == 0)
29574 int end_save = info->altivec_save_offset + info->altivec_size;
29575 int ptr_off;
29576 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29577 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29578 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29580 if (end_save + frame_off != 0)
29582 rtx offset = GEN_INT (end_save + frame_off);
29584 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29586 else
29587 emit_move_insn (ptr_reg, frame_reg_rtx);
29589 ptr_off = -end_save;
29590 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29591 info->altivec_save_offset + ptr_off,
29592 0, V4SImode, SAVRES_VR);
29593 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29595 /* Frame reg was clobbered by out-of-line save. Restore it
29596 from ptr_reg, and if we are calling out-of-line gpr or
29597 fpr restore set up the correct pointer and offset. */
29598 unsigned newptr_regno = 1;
29599 if (!restoring_GPRs_inline)
29601 bool lr = info->gp_save_offset + info->gp_size == 0;
29602 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29603 newptr_regno = ptr_regno_for_savres (sel);
29604 end_save = info->gp_save_offset + info->gp_size;
29606 else if (!restoring_FPRs_inline)
29608 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
29609 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29610 newptr_regno = ptr_regno_for_savres (sel);
29611 end_save = info->fp_save_offset + info->fp_size;
29614 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
29615 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
29617 if (end_save + ptr_off != 0)
29619 rtx offset = GEN_INT (end_save + ptr_off);
29621 frame_off = -end_save;
29622 if (TARGET_32BIT)
29623 emit_insn (gen_addsi3_carry (frame_reg_rtx,
29624 ptr_reg, offset));
29625 else
29626 emit_insn (gen_adddi3_carry (frame_reg_rtx,
29627 ptr_reg, offset));
29629 else
29631 frame_off = ptr_off;
29632 emit_move_insn (frame_reg_rtx, ptr_reg);
29636 else
29638 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29639 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29641 rtx addr, areg, mem, insn;
29642 rtx reg = gen_rtx_REG (V4SImode, i);
29643 HOST_WIDE_INT offset
29644 = (info->altivec_save_offset + frame_off
29645 + 16 * (i - info->first_altivec_reg_save));
29647 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29649 mem = gen_frame_mem (V4SImode,
29650 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29651 GEN_INT (offset)));
29652 insn = gen_rtx_SET (reg, mem);
29654 else
29656 areg = gen_rtx_REG (Pmode, 0);
29657 emit_move_insn (areg, GEN_INT (offset));
29659 /* AltiVec addressing mode is [reg+reg]. */
29660 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29661 mem = gen_frame_mem (V4SImode, addr);
29663 /* Rather than emitting a generic move, force use of the
29664 lvx instruction, which we always want. In particular we
29665 don't want lxvd2x/xxpermdi for little endian. */
29666 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29669 (void) emit_insn (insn);
29673 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29674 if (((strategy & REST_INLINE_VRS) == 0
29675 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29676 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29678 rtx reg = gen_rtx_REG (V4SImode, i);
29679 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29683 /* Restore VRSAVE if we have not done so already. */
29684 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29685 && info->vrsave_size != 0
29686 && (DEFAULT_ABI == ABI_V4
29687 || !offset_below_red_zone_p (info->vrsave_save_offset)))
29689 rtx reg;
29691 reg = gen_rtx_REG (SImode, 12);
29692 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29693 info->vrsave_save_offset + frame_off));
29695 emit_insn (generate_set_vrsave (reg, info, 1));
29698 /* If we exit by an out-of-line restore function on ABI_V4 then that
29699 function will deallocate the stack, so we don't need to worry
29700 about the unwinder restoring cr from an invalid stack frame
29701 location. */
29702 exit_func = (!restoring_FPRs_inline
29703 || (!restoring_GPRs_inline
29704 && info->first_fp_reg_save == 64));
29706 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
29707 *separate* slots if the routine calls __builtin_eh_return, so
29708 that they can be independently restored by the unwinder. */
29709 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29711 int i, cr_off = info->ehcr_offset;
29713 for (i = 0; i < 8; i++)
29714 if (!call_used_regs[CR0_REGNO + i])
29716 rtx reg = gen_rtx_REG (SImode, 0);
29717 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29718 cr_off + frame_off));
29720 insn = emit_insn (gen_movsi_to_cr_one
29721 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29723 if (!exit_func && flag_shrink_wrap)
29725 add_reg_note (insn, REG_CFA_RESTORE,
29726 gen_rtx_REG (SImode, CR0_REGNO + i));
29728 RTX_FRAME_RELATED_P (insn) = 1;
29731 cr_off += reg_size;
29735 /* Get the old lr if we saved it. If we are restoring registers
29736 out-of-line, then the out-of-line routines can do this for us. */
29737 if (restore_lr && restoring_GPRs_inline)
29738 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29740 /* Get the old cr if we saved it. */
29741 if (info->cr_save_p)
29743 unsigned cr_save_regno = 12;
29745 if (!restoring_GPRs_inline)
29747 /* Ensure we don't use the register used by the out-of-line
29748 gpr register restore below. */
29749 bool lr = info->gp_save_offset + info->gp_size == 0;
29750 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29751 int gpr_ptr_regno = ptr_regno_for_savres (sel);
29753 if (gpr_ptr_regno == 12)
29754 cr_save_regno = 11;
29755 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
29757 else if (REGNO (frame_reg_rtx) == 12)
29758 cr_save_regno = 11;
29760 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
29761 info->cr_save_offset + frame_off,
29762 exit_func);
29765 /* Set LR here to try to overlap restores below. */
29766 if (restore_lr && restoring_GPRs_inline)
29767 restore_saved_lr (0, exit_func);
29769 /* Load exception handler data registers, if needed. */
29770 if (crtl->calls_eh_return)
29772 unsigned int i, regno;
29774 if (TARGET_AIX)
29776 rtx reg = gen_rtx_REG (reg_mode, 2);
29777 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29778 frame_off + RS6000_TOC_SAVE_SLOT));
29781 for (i = 0; ; ++i)
29783 rtx mem;
29785 regno = EH_RETURN_DATA_REGNO (i);
29786 if (regno == INVALID_REGNUM)
29787 break;
29789 /* Note: possible use of r0 here to address SPE regs. */
29790 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
29791 info->ehrd_offset + frame_off
29792 + reg_size * (int) i);
29794 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
29798 /* Restore GPRs. This is done as a PARALLEL if we are using
29799 the load-multiple instructions. */
29800 if (TARGET_SPE_ABI
29801 && info->spe_64bit_regs_used
29802 && info->first_gp_reg_save != 32)
29804 /* Determine whether we can address all of the registers that need
29805 to be saved with an offset from frame_reg_rtx that fits in
29806 the small const field for SPE memory instructions. */
29807 int spe_regs_addressable
29808 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29809 + reg_size * (32 - info->first_gp_reg_save - 1))
29810 && restoring_GPRs_inline);
29812 if (!spe_regs_addressable)
29814 int ool_adjust = 0;
29815 rtx old_frame_reg_rtx = frame_reg_rtx;
29816 /* Make r11 point to the start of the SPE save area. We worried about
29817 not clobbering it when we were saving registers in the prologue.
29818 There's no need to worry here because the static chain is passed
29819 anew to every function. */
29821 if (!restoring_GPRs_inline)
29822 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29823 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29824 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
29825 GEN_INT (info->spe_gp_save_offset
29826 + frame_off
29827 - ool_adjust)));
29828 /* Keep the invariant that frame_reg_rtx + frame_off points
29829 at the top of the stack frame. */
29830 frame_off = -info->spe_gp_save_offset + ool_adjust;
29833 if (restoring_GPRs_inline)
29835 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
29837 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29838 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29840 rtx offset, addr, mem, reg;
29842 /* We're doing all this to ensure that the immediate offset
29843 fits into the immediate field of 'evldd'. */
29844 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
29846 offset = GEN_INT (spe_offset + reg_size * i);
29847 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
29848 mem = gen_rtx_MEM (V2SImode, addr);
29849 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29851 emit_move_insn (reg, mem);
29854 else
29855 rs6000_emit_savres_rtx (info, frame_reg_rtx,
29856 info->spe_gp_save_offset + frame_off,
29857 info->lr_save_offset + frame_off,
29858 reg_mode,
29859 SAVRES_GPR | SAVRES_LR);
29861 else if (!restoring_GPRs_inline)
29863 /* We are jumping to an out-of-line function. */
29864 rtx ptr_reg;
29865 int end_save = info->gp_save_offset + info->gp_size;
29866 bool can_use_exit = end_save == 0;
29867 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
29868 int ptr_off;
29870 /* Emit stack reset code if we need it. */
29871 ptr_regno = ptr_regno_for_savres (sel);
29872 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29873 if (can_use_exit)
29874 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29875 else if (end_save + frame_off != 0)
29876 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
29877 GEN_INT (end_save + frame_off)));
29878 else if (REGNO (frame_reg_rtx) != ptr_regno)
29879 emit_move_insn (ptr_reg, frame_reg_rtx);
29880 if (REGNO (frame_reg_rtx) == ptr_regno)
29881 frame_off = -end_save;
29883 if (can_use_exit && info->cr_save_p)
29884 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
29886 ptr_off = -end_save;
29887 rs6000_emit_savres_rtx (info, ptr_reg,
29888 info->gp_save_offset + ptr_off,
29889 info->lr_save_offset + ptr_off,
29890 reg_mode, sel);
29892 else if (using_load_multiple)
29894 rtvec p;
29895 p = rtvec_alloc (32 - info->first_gp_reg_save);
29896 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29897 RTVEC_ELT (p, i)
29898 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29899 frame_reg_rtx,
29900 info->gp_save_offset + frame_off + reg_size * i);
29901 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29903 else
29905 int offset = info->gp_save_offset + frame_off;
29906 for (i = info->first_gp_reg_save; i < 32; i++)
29908 if (rs6000_reg_live_or_pic_offset_p (i)
29909 && !cfun->machine->gpr_is_wrapped_separately[i])
29911 rtx reg = gen_rtx_REG (reg_mode, i);
29912 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
29915 offset += reg_size;
29919 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29921 /* If the frame pointer was used then we can't delay emitting
29922 a REG_CFA_DEF_CFA note. This must happen on the insn that
29923 restores the frame pointer, r31. We may have already emitted
29924 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
29925 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
29926 be harmless if emitted. */
29927 if (frame_pointer_needed)
29929 insn = get_last_insn ();
29930 add_reg_note (insn, REG_CFA_DEF_CFA,
29931 plus_constant (Pmode, frame_reg_rtx, frame_off));
29932 RTX_FRAME_RELATED_P (insn) = 1;
29935 /* Set up cfa_restores. We always need these when
29936 shrink-wrapping. If not shrink-wrapping then we only need
29937 the cfa_restore when the stack location is no longer valid.
29938 The cfa_restores must be emitted on or before the insn that
29939 invalidates the stack, and of course must not be emitted
29940 before the insn that actually does the restore. The latter
29941 is why it is a bad idea to emit the cfa_restores as a group
29942 on the last instruction here that actually does a restore:
29943 That insn may be reordered with respect to others doing
29944 restores. */
29945 if (flag_shrink_wrap
29946 && !restoring_GPRs_inline
29947 && info->first_fp_reg_save == 64)
29948 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29950 for (i = info->first_gp_reg_save; i < 32; i++)
29951 if (!restoring_GPRs_inline
29952 || using_load_multiple
29953 || rs6000_reg_live_or_pic_offset_p (i))
29955 if (cfun->machine->gpr_is_wrapped_separately[i])
29956 continue;
29958 rtx reg = gen_rtx_REG (reg_mode, i);
29959 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29963 if (!restoring_GPRs_inline
29964 && info->first_fp_reg_save == 64)
29966 /* We are jumping to an out-of-line function. */
29967 if (cfa_restores)
29968 emit_cfa_restores (cfa_restores);
29969 return;
29972 if (restore_lr && !restoring_GPRs_inline)
29974 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29975 restore_saved_lr (0, exit_func);
29978 /* Restore fpr's if we need to do it without calling a function. */
29979 if (restoring_FPRs_inline)
29980 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29981 if (save_reg_p (info->first_fp_reg_save + i))
29983 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29984 ? DFmode : SFmode),
29985 info->first_fp_reg_save + i);
29986 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29987 info->fp_save_offset + frame_off + 8 * i));
29988 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29989 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29992 /* If we saved cr, restore it here. Just those that were used. */
29993 if (info->cr_save_p)
29994 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
29996 /* If this is V.4, unwind the stack pointer after all of the loads
29997 have been done, or set up r11 if we are restoring fp out of line. */
29998 ptr_regno = 1;
29999 if (!restoring_FPRs_inline)
30001 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30002 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30003 ptr_regno = ptr_regno_for_savres (sel);
30006 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
30007 if (REGNO (frame_reg_rtx) == ptr_regno)
30008 frame_off = 0;
30010 if (insn && restoring_FPRs_inline)
30012 if (cfa_restores)
30014 REG_NOTES (insn) = cfa_restores;
30015 cfa_restores = NULL_RTX;
30017 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
30018 RTX_FRAME_RELATED_P (insn) = 1;
30021 if (crtl->calls_eh_return)
30023 rtx sa = EH_RETURN_STACKADJ_RTX;
30024 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
30027 if (!sibcall && restoring_FPRs_inline)
30029 if (cfa_restores)
30031 /* We can't hang the cfa_restores off a simple return,
30032 since the shrink-wrap code sometimes uses an existing
30033 return. This means there might be a path from
30034 pre-prologue code to this return, and dwarf2cfi code
30035 wants the eh_frame unwinder state to be the same on
30036 all paths to any point. So we need to emit the
30037 cfa_restores before the return. For -m64 we really
30038 don't need epilogue cfa_restores at all, except for
30039 this irritating dwarf2cfi with shrink-wrap
30040 requirement; The stack red-zone means eh_frame info
30041 from the prologue telling the unwinder to restore
30042 from the stack is perfectly good right to the end of
30043 the function. */
30044 emit_insn (gen_blockage ());
30045 emit_cfa_restores (cfa_restores);
30046 cfa_restores = NULL_RTX;
30049 emit_jump_insn (targetm.gen_simple_return ());
30052 if (!sibcall && !restoring_FPRs_inline)
30054 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30055 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
30056 int elt = 0;
30057 RTVEC_ELT (p, elt++) = ret_rtx;
30058 if (lr)
30059 RTVEC_ELT (p, elt++)
30060 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
30062 /* We have to restore more than two FP registers, so branch to the
30063 restore function. It will return to our caller. */
30064 int i;
30065 int reg;
30066 rtx sym;
30068 if (flag_shrink_wrap)
30069 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
30071 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
30072 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
30073 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
30074 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
30076 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
30078 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
30080 RTVEC_ELT (p, elt++)
30081 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
30082 if (flag_shrink_wrap)
30083 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30086 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
30089 if (cfa_restores)
30091 if (sibcall)
30092 /* Ensure the cfa_restores are hung off an insn that won't
30093 be reordered above other restores. */
30094 emit_insn (gen_blockage ());
30096 emit_cfa_restores (cfa_restores);
30100 /* Write function epilogue. */
30102 static void
30103 rs6000_output_function_epilogue (FILE *file,
30104 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
30106 #if TARGET_MACHO
30107 macho_branch_islands ();
30108 /* Mach-O doesn't support labels at the end of objects, so if
30109 it looks like we might want one, insert a NOP. */
30111 rtx_insn *insn = get_last_insn ();
30112 rtx_insn *deleted_debug_label = NULL;
30113 while (insn
30114 && NOTE_P (insn)
30115 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
30117 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
30118 notes only, instead set their CODE_LABEL_NUMBER to -1,
30119 otherwise there would be code generation differences
30120 in between -g and -g0. */
30121 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30122 deleted_debug_label = insn;
30123 insn = PREV_INSN (insn);
30125 if (insn
30126 && (LABEL_P (insn)
30127 || (NOTE_P (insn)
30128 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
30129 fputs ("\tnop\n", file);
30130 else if (deleted_debug_label)
30131 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
30132 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
30133 CODE_LABEL_NUMBER (insn) = -1;
30135 #endif
30137 /* Output a traceback table here. See /usr/include/sys/debug.h for info
30138 on its format.
30140 We don't output a traceback table if -finhibit-size-directive was
30141 used. The documentation for -finhibit-size-directive reads
30142 ``don't output a @code{.size} assembler directive, or anything
30143 else that would cause trouble if the function is split in the
30144 middle, and the two halves are placed at locations far apart in
30145 memory.'' The traceback table has this property, since it
30146 includes the offset from the start of the function to the
30147 traceback table itself.
30149 System V.4 Powerpc's (and the embedded ABI derived from it) use a
30150 different traceback table. */
30151 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30152 && ! flag_inhibit_size_directive
30153 && rs6000_traceback != traceback_none && !cfun->is_thunk)
30155 const char *fname = NULL;
30156 const char *language_string = lang_hooks.name;
30157 int fixed_parms = 0, float_parms = 0, parm_info = 0;
30158 int i;
30159 int optional_tbtab;
30160 rs6000_stack_t *info = rs6000_stack_info ();
30162 if (rs6000_traceback == traceback_full)
30163 optional_tbtab = 1;
30164 else if (rs6000_traceback == traceback_part)
30165 optional_tbtab = 0;
30166 else
30167 optional_tbtab = !optimize_size && !TARGET_ELF;
30169 if (optional_tbtab)
30171 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30172 while (*fname == '.') /* V.4 encodes . in the name */
30173 fname++;
30175 /* Need label immediately before tbtab, so we can compute
30176 its offset from the function start. */
30177 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30178 ASM_OUTPUT_LABEL (file, fname);
30181 /* The .tbtab pseudo-op can only be used for the first eight
30182 expressions, since it can't handle the possibly variable
30183 length fields that follow. However, if you omit the optional
30184 fields, the assembler outputs zeros for all optional fields
30185 anyways, giving each variable length field is minimum length
30186 (as defined in sys/debug.h). Thus we can not use the .tbtab
30187 pseudo-op at all. */
30189 /* An all-zero word flags the start of the tbtab, for debuggers
30190 that have to find it by searching forward from the entry
30191 point or from the current pc. */
30192 fputs ("\t.long 0\n", file);
30194 /* Tbtab format type. Use format type 0. */
30195 fputs ("\t.byte 0,", file);
30197 /* Language type. Unfortunately, there does not seem to be any
30198 official way to discover the language being compiled, so we
30199 use language_string.
30200 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
30201 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
30202 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
30203 either, so for now use 0. */
30204 if (lang_GNU_C ()
30205 || ! strcmp (language_string, "GNU GIMPLE")
30206 || ! strcmp (language_string, "GNU Go")
30207 || ! strcmp (language_string, "libgccjit"))
30208 i = 0;
30209 else if (! strcmp (language_string, "GNU F77")
30210 || lang_GNU_Fortran ())
30211 i = 1;
30212 else if (! strcmp (language_string, "GNU Pascal"))
30213 i = 2;
30214 else if (! strcmp (language_string, "GNU Ada"))
30215 i = 3;
30216 else if (lang_GNU_CXX ()
30217 || ! strcmp (language_string, "GNU Objective-C++"))
30218 i = 9;
30219 else if (! strcmp (language_string, "GNU Java"))
30220 i = 13;
30221 else if (! strcmp (language_string, "GNU Objective-C"))
30222 i = 14;
30223 else
30224 gcc_unreachable ();
30225 fprintf (file, "%d,", i);
30227 /* 8 single bit fields: global linkage (not set for C extern linkage,
30228 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
30229 from start of procedure stored in tbtab, internal function, function
30230 has controlled storage, function has no toc, function uses fp,
30231 function logs/aborts fp operations. */
30232 /* Assume that fp operations are used if any fp reg must be saved. */
30233 fprintf (file, "%d,",
30234 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
30236 /* 6 bitfields: function is interrupt handler, name present in
30237 proc table, function calls alloca, on condition directives
30238 (controls stack walks, 3 bits), saves condition reg, saves
30239 link reg. */
30240 /* The `function calls alloca' bit seems to be set whenever reg 31 is
30241 set up as a frame pointer, even when there is no alloca call. */
30242 fprintf (file, "%d,",
30243 ((optional_tbtab << 6)
30244 | ((optional_tbtab & frame_pointer_needed) << 5)
30245 | (info->cr_save_p << 1)
30246 | (info->lr_save_p)));
30248 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
30249 (6 bits). */
30250 fprintf (file, "%d,",
30251 (info->push_p << 7) | (64 - info->first_fp_reg_save));
30253 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
30254 fprintf (file, "%d,", (32 - first_reg_to_save ()));
30256 if (optional_tbtab)
30258 /* Compute the parameter info from the function decl argument
30259 list. */
30260 tree decl;
30261 int next_parm_info_bit = 31;
30263 for (decl = DECL_ARGUMENTS (current_function_decl);
30264 decl; decl = DECL_CHAIN (decl))
30266 rtx parameter = DECL_INCOMING_RTL (decl);
30267 machine_mode mode = GET_MODE (parameter);
30269 if (GET_CODE (parameter) == REG)
30271 if (SCALAR_FLOAT_MODE_P (mode))
30273 int bits;
30275 float_parms++;
30277 switch (mode)
30279 case SFmode:
30280 case SDmode:
30281 bits = 0x2;
30282 break;
30284 case DFmode:
30285 case DDmode:
30286 case TFmode:
30287 case TDmode:
30288 case IFmode:
30289 case KFmode:
30290 bits = 0x3;
30291 break;
30293 default:
30294 gcc_unreachable ();
30297 /* If only one bit will fit, don't or in this entry. */
30298 if (next_parm_info_bit > 0)
30299 parm_info |= (bits << (next_parm_info_bit - 1));
30300 next_parm_info_bit -= 2;
30302 else
30304 fixed_parms += ((GET_MODE_SIZE (mode)
30305 + (UNITS_PER_WORD - 1))
30306 / UNITS_PER_WORD);
30307 next_parm_info_bit -= 1;
30313 /* Number of fixed point parameters. */
30314 /* This is actually the number of words of fixed point parameters; thus
30315 an 8 byte struct counts as 2; and thus the maximum value is 8. */
30316 fprintf (file, "%d,", fixed_parms);
30318 /* 2 bitfields: number of floating point parameters (7 bits), parameters
30319 all on stack. */
30320 /* This is actually the number of fp registers that hold parameters;
30321 and thus the maximum value is 13. */
30322 /* Set parameters on stack bit if parameters are not in their original
30323 registers, regardless of whether they are on the stack? Xlc
30324 seems to set the bit when not optimizing. */
30325 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
30327 if (! optional_tbtab)
30328 return;
30330 /* Optional fields follow. Some are variable length. */
30332 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
30333 11 double float. */
30334 /* There is an entry for each parameter in a register, in the order that
30335 they occur in the parameter list. Any intervening arguments on the
30336 stack are ignored. If the list overflows a long (max possible length
30337 34 bits) then completely leave off all elements that don't fit. */
30338 /* Only emit this long if there was at least one parameter. */
30339 if (fixed_parms || float_parms)
30340 fprintf (file, "\t.long %d\n", parm_info);
30342 /* Offset from start of code to tb table. */
30343 fputs ("\t.long ", file);
30344 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
30345 RS6000_OUTPUT_BASENAME (file, fname);
30346 putc ('-', file);
30347 rs6000_output_function_entry (file, fname);
30348 putc ('\n', file);
30350 /* Interrupt handler mask. */
30351 /* Omit this long, since we never set the interrupt handler bit
30352 above. */
30354 /* Number of CTL (controlled storage) anchors. */
30355 /* Omit this long, since the has_ctl bit is never set above. */
30357 /* Displacement into stack of each CTL anchor. */
30358 /* Omit this list of longs, because there are no CTL anchors. */
30360 /* Length of function name. */
30361 if (*fname == '*')
30362 ++fname;
30363 fprintf (file, "\t.short %d\n", (int) strlen (fname));
30365 /* Function name. */
30366 assemble_string (fname, strlen (fname));
30368 /* Register for alloca automatic storage; this is always reg 31.
30369 Only emit this if the alloca bit was set above. */
30370 if (frame_pointer_needed)
30371 fputs ("\t.byte 31\n", file);
30373 fputs ("\t.align 2\n", file);
30376 /* Arrange to define .LCTOC1 label, if not already done. */
30377 if (need_toc_init)
30379 need_toc_init = 0;
30380 if (!toc_initialized)
30382 switch_to_section (toc_section);
30383 switch_to_section (current_function_section ());
30388 /* -fsplit-stack support. */
30390 /* A SYMBOL_REF for __morestack. */
30391 static GTY(()) rtx morestack_ref;
30393 static rtx
30394 gen_add3_const (rtx rt, rtx ra, long c)
30396 if (TARGET_64BIT)
30397 return gen_adddi3 (rt, ra, GEN_INT (c));
30398 else
30399 return gen_addsi3 (rt, ra, GEN_INT (c));
30402 /* Emit -fsplit-stack prologue, which goes before the regular function
30403 prologue (at local entry point in the case of ELFv2). */
30405 void
30406 rs6000_expand_split_stack_prologue (void)
30408 rs6000_stack_t *info = rs6000_stack_info ();
30409 unsigned HOST_WIDE_INT allocate;
30410 long alloc_hi, alloc_lo;
30411 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
30412 rtx_insn *insn;
30414 gcc_assert (flag_split_stack && reload_completed);
30416 if (!info->push_p)
30417 return;
30419 if (global_regs[29])
30421 error ("-fsplit-stack uses register r29");
30422 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
30423 "conflicts with %qD", global_regs_decl[29]);
30426 allocate = info->total_size;
30427 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
30429 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
30430 return;
30432 if (morestack_ref == NULL_RTX)
30434 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
30435 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
30436 | SYMBOL_FLAG_FUNCTION);
30439 r0 = gen_rtx_REG (Pmode, 0);
30440 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30441 r12 = gen_rtx_REG (Pmode, 12);
30442 emit_insn (gen_load_split_stack_limit (r0));
30443 /* Always emit two insns here to calculate the requested stack,
30444 so that the linker can edit them when adjusting size for calling
30445 non-split-stack code. */
30446 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
30447 alloc_lo = -allocate - alloc_hi;
30448 if (alloc_hi != 0)
30450 emit_insn (gen_add3_const (r12, r1, alloc_hi));
30451 if (alloc_lo != 0)
30452 emit_insn (gen_add3_const (r12, r12, alloc_lo));
30453 else
30454 emit_insn (gen_nop ());
30456 else
30458 emit_insn (gen_add3_const (r12, r1, alloc_lo));
30459 emit_insn (gen_nop ());
30462 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30463 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
30464 ok_label = gen_label_rtx ();
30465 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30466 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
30467 gen_rtx_LABEL_REF (VOIDmode, ok_label),
30468 pc_rtx);
30469 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30470 JUMP_LABEL (jump) = ok_label;
30471 /* Mark the jump as very likely to be taken. */
30472 add_int_reg_note (jump, REG_BR_PROB,
30473 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
30475 lr = gen_rtx_REG (Pmode, LR_REGNO);
30476 insn = emit_move_insn (r0, lr);
30477 RTX_FRAME_RELATED_P (insn) = 1;
30478 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
30479 RTX_FRAME_RELATED_P (insn) = 1;
30481 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
30482 const0_rtx, const0_rtx));
30483 call_fusage = NULL_RTX;
30484 use_reg (&call_fusage, r12);
30485 /* Say the call uses r0, even though it doesn't, to stop regrename
30486 from twiddling with the insns saving lr, trashing args for cfun.
30487 The insns restoring lr are similarly protected by making
30488 split_stack_return use r0. */
30489 use_reg (&call_fusage, r0);
30490 add_function_usage_to (insn, call_fusage);
30491 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
30492 insn = emit_move_insn (lr, r0);
30493 add_reg_note (insn, REG_CFA_RESTORE, lr);
30494 RTX_FRAME_RELATED_P (insn) = 1;
30495 emit_insn (gen_split_stack_return ());
30497 emit_label (ok_label);
30498 LABEL_NUSES (ok_label) = 1;
30501 /* Return the internal arg pointer used for function incoming
30502 arguments. When -fsplit-stack, the arg pointer is r12 so we need
30503 to copy it to a pseudo in order for it to be preserved over calls
30504 and suchlike. We'd really like to use a pseudo here for the
30505 internal arg pointer but data-flow analysis is not prepared to
30506 accept pseudos as live at the beginning of a function. */
30508 static rtx
30509 rs6000_internal_arg_pointer (void)
30511 if (flag_split_stack
30512 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
30513 == NULL))
30516 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
30518 rtx pat;
30520 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
30521 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
30523 /* Put the pseudo initialization right after the note at the
30524 beginning of the function. */
30525 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
30526 gen_rtx_REG (Pmode, 12));
30527 push_topmost_sequence ();
30528 emit_insn_after (pat, get_insns ());
30529 pop_topmost_sequence ();
30531 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
30532 FIRST_PARM_OFFSET (current_function_decl));
30534 return virtual_incoming_args_rtx;
30537 /* We may have to tell the dataflow pass that the split stack prologue
30538 is initializing a register. */
30540 static void
30541 rs6000_live_on_entry (bitmap regs)
30543 if (flag_split_stack)
30544 bitmap_set_bit (regs, 12);
30547 /* Emit -fsplit-stack dynamic stack allocation space check. */
30549 void
30550 rs6000_split_stack_space_check (rtx size, rtx label)
30552 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30553 rtx limit = gen_reg_rtx (Pmode);
30554 rtx requested = gen_reg_rtx (Pmode);
30555 rtx cmp = gen_reg_rtx (CCUNSmode);
30556 rtx jump;
30558 emit_insn (gen_load_split_stack_limit (limit));
30559 if (CONST_INT_P (size))
30560 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
30561 else
30563 size = force_reg (Pmode, size);
30564 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
30566 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
30567 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30568 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
30569 gen_rtx_LABEL_REF (VOIDmode, label),
30570 pc_rtx);
30571 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30572 JUMP_LABEL (jump) = label;
30575 /* A C compound statement that outputs the assembler code for a thunk
30576 function, used to implement C++ virtual function calls with
30577 multiple inheritance. The thunk acts as a wrapper around a virtual
30578 function, adjusting the implicit object parameter before handing
30579 control off to the real function.
30581 First, emit code to add the integer DELTA to the location that
30582 contains the incoming first argument. Assume that this argument
30583 contains a pointer, and is the one used to pass the `this' pointer
30584 in C++. This is the incoming argument *before* the function
30585 prologue, e.g. `%o0' on a sparc. The addition must preserve the
30586 values of all other incoming arguments.
30588 After the addition, emit code to jump to FUNCTION, which is a
30589 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
30590 not touch the return address. Hence returning from FUNCTION will
30591 return to whoever called the current `thunk'.
30593 The effect must be as if FUNCTION had been called directly with the
30594 adjusted first argument. This macro is responsible for emitting
30595 all of the code for a thunk function; output_function_prologue()
30596 and output_function_epilogue() are not invoked.
30598 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
30599 been extracted from it.) It might possibly be useful on some
30600 targets, but probably not.
30602 If you do not define this macro, the target-independent code in the
30603 C++ frontend will generate a less efficient heavyweight thunk that
30604 calls FUNCTION instead of jumping to it. The generic approach does
30605 not support varargs. */
30607 static void
30608 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
30609 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
30610 tree function)
30612 rtx this_rtx, funexp;
30613 rtx_insn *insn;
30615 reload_completed = 1;
30616 epilogue_completed = 1;
30618 /* Mark the end of the (empty) prologue. */
30619 emit_note (NOTE_INSN_PROLOGUE_END);
30621 /* Find the "this" pointer. If the function returns a structure,
30622 the structure return pointer is in r3. */
30623 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
30624 this_rtx = gen_rtx_REG (Pmode, 4);
30625 else
30626 this_rtx = gen_rtx_REG (Pmode, 3);
30628 /* Apply the constant offset, if required. */
30629 if (delta)
30630 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
30632 /* Apply the offset from the vtable, if required. */
30633 if (vcall_offset)
30635 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
30636 rtx tmp = gen_rtx_REG (Pmode, 12);
30638 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
30639 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
30641 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
30642 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
30644 else
30646 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
30648 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
30650 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
30653 /* Generate a tail call to the target function. */
30654 if (!TREE_USED (function))
30656 assemble_external (function);
30657 TREE_USED (function) = 1;
30659 funexp = XEXP (DECL_RTL (function), 0);
30660 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
30662 #if TARGET_MACHO
30663 if (MACHOPIC_INDIRECT)
30664 funexp = machopic_indirect_call_target (funexp);
30665 #endif
30667 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
30668 generate sibcall RTL explicitly. */
30669 insn = emit_call_insn (
30670 gen_rtx_PARALLEL (VOIDmode,
30671 gen_rtvec (3,
30672 gen_rtx_CALL (VOIDmode,
30673 funexp, const0_rtx),
30674 gen_rtx_USE (VOIDmode, const0_rtx),
30675 simple_return_rtx)));
30676 SIBLING_CALL_P (insn) = 1;
30677 emit_barrier ();
30679 /* Run just enough of rest_of_compilation to get the insns emitted.
30680 There's not really enough bulk here to make other passes such as
30681 instruction scheduling worth while. Note that use_thunk calls
30682 assemble_start_function and assemble_end_function. */
30683 insn = get_insns ();
30684 shorten_branches (insn);
30685 final_start_function (insn, file, 1);
30686 final (insn, file, 1);
30687 final_end_function ();
30689 reload_completed = 0;
30690 epilogue_completed = 0;
30693 /* A quick summary of the various types of 'constant-pool tables'
30694 under PowerPC:
30696 Target Flags Name One table per
30697 AIX (none) AIX TOC object file
30698 AIX -mfull-toc AIX TOC object file
30699 AIX -mminimal-toc AIX minimal TOC translation unit
30700 SVR4/EABI (none) SVR4 SDATA object file
30701 SVR4/EABI -fpic SVR4 pic object file
30702 SVR4/EABI -fPIC SVR4 PIC translation unit
30703 SVR4/EABI -mrelocatable EABI TOC function
30704 SVR4/EABI -maix AIX TOC object file
30705 SVR4/EABI -maix -mminimal-toc
30706 AIX minimal TOC translation unit
30708 Name Reg. Set by entries contains:
30709 made by addrs? fp? sum?
30711 AIX TOC 2 crt0 as Y option option
30712 AIX minimal TOC 30 prolog gcc Y Y option
30713 SVR4 SDATA 13 crt0 gcc N Y N
30714 SVR4 pic 30 prolog ld Y not yet N
30715 SVR4 PIC 30 prolog gcc Y option option
30716 EABI TOC 30 prolog gcc Y option option
30720 /* Hash functions for the hash table. */
30722 static unsigned
30723 rs6000_hash_constant (rtx k)
30725 enum rtx_code code = GET_CODE (k);
30726 machine_mode mode = GET_MODE (k);
30727 unsigned result = (code << 3) ^ mode;
30728 const char *format;
30729 int flen, fidx;
30731 format = GET_RTX_FORMAT (code);
30732 flen = strlen (format);
30733 fidx = 0;
30735 switch (code)
30737 case LABEL_REF:
30738 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
30740 case CONST_WIDE_INT:
30742 int i;
30743 flen = CONST_WIDE_INT_NUNITS (k);
30744 for (i = 0; i < flen; i++)
30745 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
30746 return result;
30749 case CONST_DOUBLE:
30750 if (mode != VOIDmode)
30751 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
30752 flen = 2;
30753 break;
30755 case CODE_LABEL:
30756 fidx = 3;
30757 break;
30759 default:
30760 break;
30763 for (; fidx < flen; fidx++)
30764 switch (format[fidx])
30766 case 's':
30768 unsigned i, len;
30769 const char *str = XSTR (k, fidx);
30770 len = strlen (str);
30771 result = result * 613 + len;
30772 for (i = 0; i < len; i++)
30773 result = result * 613 + (unsigned) str[i];
30774 break;
30776 case 'u':
30777 case 'e':
30778 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
30779 break;
30780 case 'i':
30781 case 'n':
30782 result = result * 613 + (unsigned) XINT (k, fidx);
30783 break;
30784 case 'w':
30785 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
30786 result = result * 613 + (unsigned) XWINT (k, fidx);
30787 else
30789 size_t i;
30790 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
30791 result = result * 613 + (unsigned) (XWINT (k, fidx)
30792 >> CHAR_BIT * i);
30794 break;
30795 case '0':
30796 break;
30797 default:
30798 gcc_unreachable ();
30801 return result;
30804 hashval_t
30805 toc_hasher::hash (toc_hash_struct *thc)
30807 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
30810 /* Compare H1 and H2 for equivalence. */
30812 bool
30813 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
30815 rtx r1 = h1->key;
30816 rtx r2 = h2->key;
30818 if (h1->key_mode != h2->key_mode)
30819 return 0;
30821 return rtx_equal_p (r1, r2);
30824 /* These are the names given by the C++ front-end to vtables, and
30825 vtable-like objects. Ideally, this logic should not be here;
30826 instead, there should be some programmatic way of inquiring as
30827 to whether or not an object is a vtable. */
30829 #define VTABLE_NAME_P(NAME) \
30830 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
30831 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
30832 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
30833 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
30834 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
30836 #ifdef NO_DOLLAR_IN_LABEL
30837 /* Return a GGC-allocated character string translating dollar signs in
30838 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
30840 const char *
30841 rs6000_xcoff_strip_dollar (const char *name)
30843 char *strip, *p;
30844 const char *q;
30845 size_t len;
30847 q = (const char *) strchr (name, '$');
30849 if (q == 0 || q == name)
30850 return name;
30852 len = strlen (name);
30853 strip = XALLOCAVEC (char, len + 1);
30854 strcpy (strip, name);
30855 p = strip + (q - name);
30856 while (p)
30858 *p = '_';
30859 p = strchr (p + 1, '$');
30862 return ggc_alloc_string (strip, len);
30864 #endif
30866 void
30867 rs6000_output_symbol_ref (FILE *file, rtx x)
30869 const char *name = XSTR (x, 0);
30871 /* Currently C++ toc references to vtables can be emitted before it
30872 is decided whether the vtable is public or private. If this is
30873 the case, then the linker will eventually complain that there is
30874 a reference to an unknown section. Thus, for vtables only,
30875 we emit the TOC reference to reference the identifier and not the
30876 symbol. */
30877 if (VTABLE_NAME_P (name))
30879 RS6000_OUTPUT_BASENAME (file, name);
30881 else
30882 assemble_name (file, name);
30885 /* Output a TOC entry. We derive the entry name from what is being
30886 written. */
30888 void
30889 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
30891 char buf[256];
30892 const char *name = buf;
30893 rtx base = x;
30894 HOST_WIDE_INT offset = 0;
30896 gcc_assert (!TARGET_NO_TOC);
30898 /* When the linker won't eliminate them, don't output duplicate
30899 TOC entries (this happens on AIX if there is any kind of TOC,
30900 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
30901 CODE_LABELs. */
30902 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
30904 struct toc_hash_struct *h;
30906 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
30907 time because GGC is not initialized at that point. */
30908 if (toc_hash_table == NULL)
30909 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
30911 h = ggc_alloc<toc_hash_struct> ();
30912 h->key = x;
30913 h->key_mode = mode;
30914 h->labelno = labelno;
30916 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
30917 if (*found == NULL)
30918 *found = h;
30919 else /* This is indeed a duplicate.
30920 Set this label equal to that label. */
30922 fputs ("\t.set ", file);
30923 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30924 fprintf (file, "%d,", labelno);
30925 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30926 fprintf (file, "%d\n", ((*found)->labelno));
30928 #ifdef HAVE_AS_TLS
30929 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
30930 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
30931 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
30933 fputs ("\t.set ", file);
30934 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30935 fprintf (file, "%d,", labelno);
30936 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30937 fprintf (file, "%d\n", ((*found)->labelno));
30939 #endif
30940 return;
30944 /* If we're going to put a double constant in the TOC, make sure it's
30945 aligned properly when strict alignment is on. */
30946 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
30947 && STRICT_ALIGNMENT
30948 && GET_MODE_BITSIZE (mode) >= 64
30949 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
30950 ASM_OUTPUT_ALIGN (file, 3);
30953 (*targetm.asm_out.internal_label) (file, "LC", labelno);
30955 /* Handle FP constants specially. Note that if we have a minimal
30956 TOC, things we put here aren't actually in the TOC, so we can allow
30957 FP constants. */
30958 if (GET_CODE (x) == CONST_DOUBLE &&
30959 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
30960 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
30962 long k[4];
30964 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30965 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
30966 else
30967 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30969 if (TARGET_64BIT)
30971 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30972 fputs (DOUBLE_INT_ASM_OP, file);
30973 else
30974 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30975 k[0] & 0xffffffff, k[1] & 0xffffffff,
30976 k[2] & 0xffffffff, k[3] & 0xffffffff);
30977 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
30978 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30979 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
30980 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
30981 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
30982 return;
30984 else
30986 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30987 fputs ("\t.long ", file);
30988 else
30989 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30990 k[0] & 0xffffffff, k[1] & 0xffffffff,
30991 k[2] & 0xffffffff, k[3] & 0xffffffff);
30992 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
30993 k[0] & 0xffffffff, k[1] & 0xffffffff,
30994 k[2] & 0xffffffff, k[3] & 0xffffffff);
30995 return;
30998 else if (GET_CODE (x) == CONST_DOUBLE &&
30999 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
31001 long k[2];
31003 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31004 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
31005 else
31006 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
31008 if (TARGET_64BIT)
31010 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31011 fputs (DOUBLE_INT_ASM_OP, file);
31012 else
31013 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31014 k[0] & 0xffffffff, k[1] & 0xffffffff);
31015 fprintf (file, "0x%lx%08lx\n",
31016 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
31017 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
31018 return;
31020 else
31022 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31023 fputs ("\t.long ", file);
31024 else
31025 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
31026 k[0] & 0xffffffff, k[1] & 0xffffffff);
31027 fprintf (file, "0x%lx,0x%lx\n",
31028 k[0] & 0xffffffff, k[1] & 0xffffffff);
31029 return;
31032 else if (GET_CODE (x) == CONST_DOUBLE &&
31033 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
31035 long l;
31037 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
31038 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
31039 else
31040 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
31042 if (TARGET_64BIT)
31044 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31045 fputs (DOUBLE_INT_ASM_OP, file);
31046 else
31047 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31048 if (WORDS_BIG_ENDIAN)
31049 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
31050 else
31051 fprintf (file, "0x%lx\n", l & 0xffffffff);
31052 return;
31054 else
31056 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31057 fputs ("\t.long ", file);
31058 else
31059 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
31060 fprintf (file, "0x%lx\n", l & 0xffffffff);
31061 return;
31064 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
31066 unsigned HOST_WIDE_INT low;
31067 HOST_WIDE_INT high;
31069 low = INTVAL (x) & 0xffffffff;
31070 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
31072 /* TOC entries are always Pmode-sized, so when big-endian
31073 smaller integer constants in the TOC need to be padded.
31074 (This is still a win over putting the constants in
31075 a separate constant pool, because then we'd have
31076 to have both a TOC entry _and_ the actual constant.)
31078 For a 32-bit target, CONST_INT values are loaded and shifted
31079 entirely within `low' and can be stored in one TOC entry. */
31081 /* It would be easy to make this work, but it doesn't now. */
31082 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
31084 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
31086 low |= high << 32;
31087 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
31088 high = (HOST_WIDE_INT) low >> 32;
31089 low &= 0xffffffff;
31092 if (TARGET_64BIT)
31094 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31095 fputs (DOUBLE_INT_ASM_OP, file);
31096 else
31097 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31098 (long) high & 0xffffffff, (long) low & 0xffffffff);
31099 fprintf (file, "0x%lx%08lx\n",
31100 (long) high & 0xffffffff, (long) low & 0xffffffff);
31101 return;
31103 else
31105 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
31107 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31108 fputs ("\t.long ", file);
31109 else
31110 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
31111 (long) high & 0xffffffff, (long) low & 0xffffffff);
31112 fprintf (file, "0x%lx,0x%lx\n",
31113 (long) high & 0xffffffff, (long) low & 0xffffffff);
31115 else
31117 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31118 fputs ("\t.long ", file);
31119 else
31120 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
31121 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
31123 return;
31127 if (GET_CODE (x) == CONST)
31129 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
31130 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
31132 base = XEXP (XEXP (x, 0), 0);
31133 offset = INTVAL (XEXP (XEXP (x, 0), 1));
31136 switch (GET_CODE (base))
31138 case SYMBOL_REF:
31139 name = XSTR (base, 0);
31140 break;
31142 case LABEL_REF:
31143 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
31144 CODE_LABEL_NUMBER (XEXP (base, 0)));
31145 break;
31147 case CODE_LABEL:
31148 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
31149 break;
31151 default:
31152 gcc_unreachable ();
31155 if (TARGET_ELF || TARGET_MINIMAL_TOC)
31156 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
31157 else
31159 fputs ("\t.tc ", file);
31160 RS6000_OUTPUT_BASENAME (file, name);
31162 if (offset < 0)
31163 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
31164 else if (offset)
31165 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
31167 /* Mark large TOC symbols on AIX with [TE] so they are mapped
31168 after other TOC symbols, reducing overflow of small TOC access
31169 to [TC] symbols. */
31170 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
31171 ? "[TE]," : "[TC],", file);
31174 /* Currently C++ toc references to vtables can be emitted before it
31175 is decided whether the vtable is public or private. If this is
31176 the case, then the linker will eventually complain that there is
31177 a TOC reference to an unknown section. Thus, for vtables only,
31178 we emit the TOC reference to reference the symbol and not the
31179 section. */
31180 if (VTABLE_NAME_P (name))
31182 RS6000_OUTPUT_BASENAME (file, name);
31183 if (offset < 0)
31184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
31185 else if (offset > 0)
31186 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
31188 else
31189 output_addr_const (file, x);
31191 #if HAVE_AS_TLS
31192 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
31194 switch (SYMBOL_REF_TLS_MODEL (base))
31196 case 0:
31197 break;
31198 case TLS_MODEL_LOCAL_EXEC:
31199 fputs ("@le", file);
31200 break;
31201 case TLS_MODEL_INITIAL_EXEC:
31202 fputs ("@ie", file);
31203 break;
31204 /* Use global-dynamic for local-dynamic. */
31205 case TLS_MODEL_GLOBAL_DYNAMIC:
31206 case TLS_MODEL_LOCAL_DYNAMIC:
31207 putc ('\n', file);
31208 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
31209 fputs ("\t.tc .", file);
31210 RS6000_OUTPUT_BASENAME (file, name);
31211 fputs ("[TC],", file);
31212 output_addr_const (file, x);
31213 fputs ("@m", file);
31214 break;
31215 default:
31216 gcc_unreachable ();
31219 #endif
31221 putc ('\n', file);
31224 /* Output an assembler pseudo-op to write an ASCII string of N characters
31225 starting at P to FILE.
31227 On the RS/6000, we have to do this using the .byte operation and
31228 write out special characters outside the quoted string.
31229 Also, the assembler is broken; very long strings are truncated,
31230 so we must artificially break them up early. */
31232 void
31233 output_ascii (FILE *file, const char *p, int n)
31235 char c;
31236 int i, count_string;
31237 const char *for_string = "\t.byte \"";
31238 const char *for_decimal = "\t.byte ";
31239 const char *to_close = NULL;
31241 count_string = 0;
31242 for (i = 0; i < n; i++)
31244 c = *p++;
31245 if (c >= ' ' && c < 0177)
31247 if (for_string)
31248 fputs (for_string, file);
31249 putc (c, file);
31251 /* Write two quotes to get one. */
31252 if (c == '"')
31254 putc (c, file);
31255 ++count_string;
31258 for_string = NULL;
31259 for_decimal = "\"\n\t.byte ";
31260 to_close = "\"\n";
31261 ++count_string;
31263 if (count_string >= 512)
31265 fputs (to_close, file);
31267 for_string = "\t.byte \"";
31268 for_decimal = "\t.byte ";
31269 to_close = NULL;
31270 count_string = 0;
31273 else
31275 if (for_decimal)
31276 fputs (for_decimal, file);
31277 fprintf (file, "%d", c);
31279 for_string = "\n\t.byte \"";
31280 for_decimal = ", ";
31281 to_close = "\n";
31282 count_string = 0;
31286 /* Now close the string if we have written one. Then end the line. */
31287 if (to_close)
31288 fputs (to_close, file);
31291 /* Generate a unique section name for FILENAME for a section type
31292 represented by SECTION_DESC. Output goes into BUF.
31294 SECTION_DESC can be any string, as long as it is different for each
31295 possible section type.
31297 We name the section in the same manner as xlc. The name begins with an
31298 underscore followed by the filename (after stripping any leading directory
31299 names) with the last period replaced by the string SECTION_DESC. If
31300 FILENAME does not contain a period, SECTION_DESC is appended to the end of
31301 the name. */
31303 void
31304 rs6000_gen_section_name (char **buf, const char *filename,
31305 const char *section_desc)
31307 const char *q, *after_last_slash, *last_period = 0;
31308 char *p;
31309 int len;
31311 after_last_slash = filename;
31312 for (q = filename; *q; q++)
31314 if (*q == '/')
31315 after_last_slash = q + 1;
31316 else if (*q == '.')
31317 last_period = q;
31320 len = strlen (after_last_slash) + strlen (section_desc) + 2;
31321 *buf = (char *) xmalloc (len);
31323 p = *buf;
31324 *p++ = '_';
31326 for (q = after_last_slash; *q; q++)
31328 if (q == last_period)
31330 strcpy (p, section_desc);
31331 p += strlen (section_desc);
31332 break;
31335 else if (ISALNUM (*q))
31336 *p++ = *q;
31339 if (last_period == 0)
31340 strcpy (p, section_desc);
31341 else
31342 *p = '\0';
31345 /* Emit profile function. */
31347 void
31348 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
31350 /* Non-standard profiling for kernels, which just saves LR then calls
31351 _mcount without worrying about arg saves. The idea is to change
31352 the function prologue as little as possible as it isn't easy to
31353 account for arg save/restore code added just for _mcount. */
31354 if (TARGET_PROFILE_KERNEL)
31355 return;
31357 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31359 #ifndef NO_PROFILE_COUNTERS
31360 # define NO_PROFILE_COUNTERS 0
31361 #endif
31362 if (NO_PROFILE_COUNTERS)
31363 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31364 LCT_NORMAL, VOIDmode, 0);
31365 else
31367 char buf[30];
31368 const char *label_name;
31369 rtx fun;
31371 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31372 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
31373 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
31375 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
31376 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
31379 else if (DEFAULT_ABI == ABI_DARWIN)
31381 const char *mcount_name = RS6000_MCOUNT;
31382 int caller_addr_regno = LR_REGNO;
31384 /* Be conservative and always set this, at least for now. */
31385 crtl->uses_pic_offset_table = 1;
31387 #if TARGET_MACHO
31388 /* For PIC code, set up a stub and collect the caller's address
31389 from r0, which is where the prologue puts it. */
31390 if (MACHOPIC_INDIRECT
31391 && crtl->uses_pic_offset_table)
31392 caller_addr_regno = 0;
31393 #endif
31394 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
31395 LCT_NORMAL, VOIDmode, 1,
31396 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
31400 /* Write function profiler code. */
31402 void
31403 output_function_profiler (FILE *file, int labelno)
31405 char buf[100];
31407 switch (DEFAULT_ABI)
31409 default:
31410 gcc_unreachable ();
31412 case ABI_V4:
31413 if (!TARGET_32BIT)
31415 warning (0, "no profiling of 64-bit code for this ABI");
31416 return;
31418 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
31419 fprintf (file, "\tmflr %s\n", reg_names[0]);
31420 if (NO_PROFILE_COUNTERS)
31422 asm_fprintf (file, "\tstw %s,4(%s)\n",
31423 reg_names[0], reg_names[1]);
31425 else if (TARGET_SECURE_PLT && flag_pic)
31427 if (TARGET_LINK_STACK)
31429 char name[32];
31430 get_ppc476_thunk_name (name);
31431 asm_fprintf (file, "\tbl %s\n", name);
31433 else
31434 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
31435 asm_fprintf (file, "\tstw %s,4(%s)\n",
31436 reg_names[0], reg_names[1]);
31437 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31438 asm_fprintf (file, "\taddis %s,%s,",
31439 reg_names[12], reg_names[12]);
31440 assemble_name (file, buf);
31441 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
31442 assemble_name (file, buf);
31443 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
31445 else if (flag_pic == 1)
31447 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
31448 asm_fprintf (file, "\tstw %s,4(%s)\n",
31449 reg_names[0], reg_names[1]);
31450 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
31451 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
31452 assemble_name (file, buf);
31453 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
31455 else if (flag_pic > 1)
31457 asm_fprintf (file, "\tstw %s,4(%s)\n",
31458 reg_names[0], reg_names[1]);
31459 /* Now, we need to get the address of the label. */
31460 if (TARGET_LINK_STACK)
31462 char name[32];
31463 get_ppc476_thunk_name (name);
31464 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
31465 assemble_name (file, buf);
31466 fputs ("-.\n1:", file);
31467 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31468 asm_fprintf (file, "\taddi %s,%s,4\n",
31469 reg_names[11], reg_names[11]);
31471 else
31473 fputs ("\tbcl 20,31,1f\n\t.long ", file);
31474 assemble_name (file, buf);
31475 fputs ("-.\n1:", file);
31476 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
31478 asm_fprintf (file, "\tlwz %s,0(%s)\n",
31479 reg_names[0], reg_names[11]);
31480 asm_fprintf (file, "\tadd %s,%s,%s\n",
31481 reg_names[0], reg_names[0], reg_names[11]);
31483 else
31485 asm_fprintf (file, "\tlis %s,", reg_names[12]);
31486 assemble_name (file, buf);
31487 fputs ("@ha\n", file);
31488 asm_fprintf (file, "\tstw %s,4(%s)\n",
31489 reg_names[0], reg_names[1]);
31490 asm_fprintf (file, "\tla %s,", reg_names[0]);
31491 assemble_name (file, buf);
31492 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
31495 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
31496 fprintf (file, "\tbl %s%s\n",
31497 RS6000_MCOUNT, flag_pic ? "@plt" : "");
31498 break;
31500 case ABI_AIX:
31501 case ABI_ELFv2:
31502 case ABI_DARWIN:
31503 /* Don't do anything, done in output_profile_hook (). */
31504 break;
31510 /* The following variable value is the last issued insn. */
31512 static rtx_insn *last_scheduled_insn;
31514 /* The following variable helps to balance issuing of load and
31515 store instructions */
31517 static int load_store_pendulum;
31519 /* The following variable helps pair divide insns during scheduling. */
31520 static int divide_cnt;
31521 /* The following variable helps pair and alternate vector and vector load
31522 insns during scheduling. */
31523 static int vec_load_pendulum;
31526 /* Power4 load update and store update instructions are cracked into a
31527 load or store and an integer insn which are executed in the same cycle.
31528 Branches have their own dispatch slot which does not count against the
31529 GCC issue rate, but it changes the program flow so there are no other
31530 instructions to issue in this cycle. */
31532 static int
31533 rs6000_variable_issue_1 (rtx_insn *insn, int more)
31535 last_scheduled_insn = insn;
31536 if (GET_CODE (PATTERN (insn)) == USE
31537 || GET_CODE (PATTERN (insn)) == CLOBBER)
31539 cached_can_issue_more = more;
31540 return cached_can_issue_more;
31543 if (insn_terminates_group_p (insn, current_group))
31545 cached_can_issue_more = 0;
31546 return cached_can_issue_more;
31549 /* If no reservation, but reach here */
31550 if (recog_memoized (insn) < 0)
31551 return more;
31553 if (rs6000_sched_groups)
31555 if (is_microcoded_insn (insn))
31556 cached_can_issue_more = 0;
31557 else if (is_cracked_insn (insn))
31558 cached_can_issue_more = more > 2 ? more - 2 : 0;
31559 else
31560 cached_can_issue_more = more - 1;
31562 return cached_can_issue_more;
31565 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
31566 return 0;
31568 cached_can_issue_more = more - 1;
31569 return cached_can_issue_more;
31572 static int
31573 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
31575 int r = rs6000_variable_issue_1 (insn, more);
31576 if (verbose)
31577 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
31578 return r;
31581 /* Adjust the cost of a scheduling dependency. Return the new cost of
31582 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
31584 static int
31585 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
31586 unsigned int)
31588 enum attr_type attr_type;
31590 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
31591 return cost;
31593 switch (dep_type)
31595 case REG_DEP_TRUE:
31597 /* Data dependency; DEP_INSN writes a register that INSN reads
31598 some cycles later. */
31600 /* Separate a load from a narrower, dependent store. */
31601 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
31602 && GET_CODE (PATTERN (insn)) == SET
31603 && GET_CODE (PATTERN (dep_insn)) == SET
31604 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
31605 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
31606 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
31607 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
31608 return cost + 14;
31610 attr_type = get_attr_type (insn);
31612 switch (attr_type)
31614 case TYPE_JMPREG:
31615 /* Tell the first scheduling pass about the latency between
31616 a mtctr and bctr (and mtlr and br/blr). The first
31617 scheduling pass will not know about this latency since
31618 the mtctr instruction, which has the latency associated
31619 to it, will be generated by reload. */
31620 return 4;
31621 case TYPE_BRANCH:
31622 /* Leave some extra cycles between a compare and its
31623 dependent branch, to inhibit expensive mispredicts. */
31624 if ((rs6000_cpu_attr == CPU_PPC603
31625 || rs6000_cpu_attr == CPU_PPC604
31626 || rs6000_cpu_attr == CPU_PPC604E
31627 || rs6000_cpu_attr == CPU_PPC620
31628 || rs6000_cpu_attr == CPU_PPC630
31629 || rs6000_cpu_attr == CPU_PPC750
31630 || rs6000_cpu_attr == CPU_PPC7400
31631 || rs6000_cpu_attr == CPU_PPC7450
31632 || rs6000_cpu_attr == CPU_PPCE5500
31633 || rs6000_cpu_attr == CPU_PPCE6500
31634 || rs6000_cpu_attr == CPU_POWER4
31635 || rs6000_cpu_attr == CPU_POWER5
31636 || rs6000_cpu_attr == CPU_POWER7
31637 || rs6000_cpu_attr == CPU_POWER8
31638 || rs6000_cpu_attr == CPU_POWER9
31639 || rs6000_cpu_attr == CPU_CELL)
31640 && recog_memoized (dep_insn)
31641 && (INSN_CODE (dep_insn) >= 0))
31643 switch (get_attr_type (dep_insn))
31645 case TYPE_CMP:
31646 case TYPE_FPCOMPARE:
31647 case TYPE_CR_LOGICAL:
31648 case TYPE_DELAYED_CR:
31649 return cost + 2;
31650 case TYPE_EXTS:
31651 case TYPE_MUL:
31652 if (get_attr_dot (dep_insn) == DOT_YES)
31653 return cost + 2;
31654 else
31655 break;
31656 case TYPE_SHIFT:
31657 if (get_attr_dot (dep_insn) == DOT_YES
31658 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
31659 return cost + 2;
31660 else
31661 break;
31662 default:
31663 break;
31665 break;
31667 case TYPE_STORE:
31668 case TYPE_FPSTORE:
31669 if ((rs6000_cpu == PROCESSOR_POWER6)
31670 && recog_memoized (dep_insn)
31671 && (INSN_CODE (dep_insn) >= 0))
31674 if (GET_CODE (PATTERN (insn)) != SET)
31675 /* If this happens, we have to extend this to schedule
31676 optimally. Return default for now. */
31677 return cost;
31679 /* Adjust the cost for the case where the value written
31680 by a fixed point operation is used as the address
31681 gen value on a store. */
31682 switch (get_attr_type (dep_insn))
31684 case TYPE_LOAD:
31685 case TYPE_CNTLZ:
31687 if (! store_data_bypass_p (dep_insn, insn))
31688 return get_attr_sign_extend (dep_insn)
31689 == SIGN_EXTEND_YES ? 6 : 4;
31690 break;
31692 case TYPE_SHIFT:
31694 if (! store_data_bypass_p (dep_insn, insn))
31695 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31696 6 : 3;
31697 break;
31699 case TYPE_INTEGER:
31700 case TYPE_ADD:
31701 case TYPE_LOGICAL:
31702 case TYPE_EXTS:
31703 case TYPE_INSERT:
31705 if (! store_data_bypass_p (dep_insn, insn))
31706 return 3;
31707 break;
31709 case TYPE_STORE:
31710 case TYPE_FPLOAD:
31711 case TYPE_FPSTORE:
31713 if (get_attr_update (dep_insn) == UPDATE_YES
31714 && ! store_data_bypass_p (dep_insn, insn))
31715 return 3;
31716 break;
31718 case TYPE_MUL:
31720 if (! store_data_bypass_p (dep_insn, insn))
31721 return 17;
31722 break;
31724 case TYPE_DIV:
31726 if (! store_data_bypass_p (dep_insn, insn))
31727 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31728 break;
31730 default:
31731 break;
31734 break;
31736 case TYPE_LOAD:
31737 if ((rs6000_cpu == PROCESSOR_POWER6)
31738 && recog_memoized (dep_insn)
31739 && (INSN_CODE (dep_insn) >= 0))
31742 /* Adjust the cost for the case where the value written
31743 by a fixed point instruction is used within the address
31744 gen portion of a subsequent load(u)(x) */
31745 switch (get_attr_type (dep_insn))
31747 case TYPE_LOAD:
31748 case TYPE_CNTLZ:
31750 if (set_to_load_agen (dep_insn, insn))
31751 return get_attr_sign_extend (dep_insn)
31752 == SIGN_EXTEND_YES ? 6 : 4;
31753 break;
31755 case TYPE_SHIFT:
31757 if (set_to_load_agen (dep_insn, insn))
31758 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31759 6 : 3;
31760 break;
31762 case TYPE_INTEGER:
31763 case TYPE_ADD:
31764 case TYPE_LOGICAL:
31765 case TYPE_EXTS:
31766 case TYPE_INSERT:
31768 if (set_to_load_agen (dep_insn, insn))
31769 return 3;
31770 break;
31772 case TYPE_STORE:
31773 case TYPE_FPLOAD:
31774 case TYPE_FPSTORE:
31776 if (get_attr_update (dep_insn) == UPDATE_YES
31777 && set_to_load_agen (dep_insn, insn))
31778 return 3;
31779 break;
31781 case TYPE_MUL:
31783 if (set_to_load_agen (dep_insn, insn))
31784 return 17;
31785 break;
31787 case TYPE_DIV:
31789 if (set_to_load_agen (dep_insn, insn))
31790 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31791 break;
31793 default:
31794 break;
31797 break;
31799 case TYPE_FPLOAD:
31800 if ((rs6000_cpu == PROCESSOR_POWER6)
31801 && get_attr_update (insn) == UPDATE_NO
31802 && recog_memoized (dep_insn)
31803 && (INSN_CODE (dep_insn) >= 0)
31804 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
31805 return 2;
31807 default:
31808 break;
31811 /* Fall out to return default cost. */
31813 break;
31815 case REG_DEP_OUTPUT:
31816 /* Output dependency; DEP_INSN writes a register that INSN writes some
31817 cycles later. */
31818 if ((rs6000_cpu == PROCESSOR_POWER6)
31819 && recog_memoized (dep_insn)
31820 && (INSN_CODE (dep_insn) >= 0))
31822 attr_type = get_attr_type (insn);
31824 switch (attr_type)
31826 case TYPE_FP:
31827 case TYPE_FPSIMPLE:
31828 if (get_attr_type (dep_insn) == TYPE_FP
31829 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
31830 return 1;
31831 break;
31832 case TYPE_FPLOAD:
31833 if (get_attr_update (insn) == UPDATE_NO
31834 && get_attr_type (dep_insn) == TYPE_MFFGPR)
31835 return 2;
31836 break;
31837 default:
31838 break;
31841 /* Fall through, no cost for output dependency. */
31842 /* FALLTHRU */
31844 case REG_DEP_ANTI:
31845 /* Anti dependency; DEP_INSN reads a register that INSN writes some
31846 cycles later. */
31847 return 0;
31849 default:
31850 gcc_unreachable ();
31853 return cost;
31856 /* Debug version of rs6000_adjust_cost. */
31858 static int
31859 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
31860 int cost, unsigned int dw)
31862 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
31864 if (ret != cost)
31866 const char *dep;
31868 switch (dep_type)
31870 default: dep = "unknown depencency"; break;
31871 case REG_DEP_TRUE: dep = "data dependency"; break;
31872 case REG_DEP_OUTPUT: dep = "output dependency"; break;
31873 case REG_DEP_ANTI: dep = "anti depencency"; break;
31876 fprintf (stderr,
31877 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
31878 "%s, insn:\n", ret, cost, dep);
31880 debug_rtx (insn);
31883 return ret;
31886 /* The function returns a true if INSN is microcoded.
31887 Return false otherwise. */
31889 static bool
31890 is_microcoded_insn (rtx_insn *insn)
31892 if (!insn || !NONDEBUG_INSN_P (insn)
31893 || GET_CODE (PATTERN (insn)) == USE
31894 || GET_CODE (PATTERN (insn)) == CLOBBER)
31895 return false;
31897 if (rs6000_cpu_attr == CPU_CELL)
31898 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
31900 if (rs6000_sched_groups
31901 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31903 enum attr_type type = get_attr_type (insn);
31904 if ((type == TYPE_LOAD
31905 && get_attr_update (insn) == UPDATE_YES
31906 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
31907 || ((type == TYPE_LOAD || type == TYPE_STORE)
31908 && get_attr_update (insn) == UPDATE_YES
31909 && get_attr_indexed (insn) == INDEXED_YES)
31910 || type == TYPE_MFCR)
31911 return true;
31914 return false;
31917 /* The function returns true if INSN is cracked into 2 instructions
31918 by the processor (and therefore occupies 2 issue slots). */
31920 static bool
31921 is_cracked_insn (rtx_insn *insn)
31923 if (!insn || !NONDEBUG_INSN_P (insn)
31924 || GET_CODE (PATTERN (insn)) == USE
31925 || GET_CODE (PATTERN (insn)) == CLOBBER)
31926 return false;
31928 if (rs6000_sched_groups
31929 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31931 enum attr_type type = get_attr_type (insn);
31932 if ((type == TYPE_LOAD
31933 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31934 && get_attr_update (insn) == UPDATE_NO)
31935 || (type == TYPE_LOAD
31936 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
31937 && get_attr_update (insn) == UPDATE_YES
31938 && get_attr_indexed (insn) == INDEXED_NO)
31939 || (type == TYPE_STORE
31940 && get_attr_update (insn) == UPDATE_YES
31941 && get_attr_indexed (insn) == INDEXED_NO)
31942 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
31943 && get_attr_update (insn) == UPDATE_YES)
31944 || type == TYPE_DELAYED_CR
31945 || (type == TYPE_EXTS
31946 && get_attr_dot (insn) == DOT_YES)
31947 || (type == TYPE_SHIFT
31948 && get_attr_dot (insn) == DOT_YES
31949 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
31950 || (type == TYPE_MUL
31951 && get_attr_dot (insn) == DOT_YES)
31952 || type == TYPE_DIV
31953 || (type == TYPE_INSERT
31954 && get_attr_size (insn) == SIZE_32))
31955 return true;
31958 return false;
31961 /* The function returns true if INSN can be issued only from
31962 the branch slot. */
31964 static bool
31965 is_branch_slot_insn (rtx_insn *insn)
31967 if (!insn || !NONDEBUG_INSN_P (insn)
31968 || GET_CODE (PATTERN (insn)) == USE
31969 || GET_CODE (PATTERN (insn)) == CLOBBER)
31970 return false;
31972 if (rs6000_sched_groups)
31974 enum attr_type type = get_attr_type (insn);
31975 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
31976 return true;
31977 return false;
31980 return false;
31983 /* The function returns true if out_inst sets a value that is
31984 used in the address generation computation of in_insn */
31985 static bool
31986 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
31988 rtx out_set, in_set;
31990 /* For performance reasons, only handle the simple case where
31991 both loads are a single_set. */
31992 out_set = single_set (out_insn);
31993 if (out_set)
31995 in_set = single_set (in_insn);
31996 if (in_set)
31997 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
32000 return false;
32003 /* Try to determine base/offset/size parts of the given MEM.
32004 Return true if successful, false if all the values couldn't
32005 be determined.
32007 This function only looks for REG or REG+CONST address forms.
32008 REG+REG address form will return false. */
32010 static bool
32011 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
32012 HOST_WIDE_INT *size)
32014 rtx addr_rtx;
32015 if MEM_SIZE_KNOWN_P (mem)
32016 *size = MEM_SIZE (mem);
32017 else
32018 return false;
32020 addr_rtx = (XEXP (mem, 0));
32021 if (GET_CODE (addr_rtx) == PRE_MODIFY)
32022 addr_rtx = XEXP (addr_rtx, 1);
32024 *offset = 0;
32025 while (GET_CODE (addr_rtx) == PLUS
32026 && CONST_INT_P (XEXP (addr_rtx, 1)))
32028 *offset += INTVAL (XEXP (addr_rtx, 1));
32029 addr_rtx = XEXP (addr_rtx, 0);
32031 if (!REG_P (addr_rtx))
32032 return false;
32034 *base = addr_rtx;
32035 return true;
32038 /* The function returns true if the target storage location of
32039 mem1 is adjacent to the target storage location of mem2 */
32040 /* Return 1 if memory locations are adjacent. */
32042 static bool
32043 adjacent_mem_locations (rtx mem1, rtx mem2)
32045 rtx reg1, reg2;
32046 HOST_WIDE_INT off1, size1, off2, size2;
32048 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32049 && get_memref_parts (mem2, &reg2, &off2, &size2))
32050 return ((REGNO (reg1) == REGNO (reg2))
32051 && ((off1 + size1 == off2)
32052 || (off2 + size2 == off1)));
32054 return false;
32057 /* This function returns true if it can be determined that the two MEM
32058 locations overlap by at least 1 byte based on base reg/offset/size. */
32060 static bool
32061 mem_locations_overlap (rtx mem1, rtx mem2)
32063 rtx reg1, reg2;
32064 HOST_WIDE_INT off1, size1, off2, size2;
32066 if (get_memref_parts (mem1, &reg1, &off1, &size1)
32067 && get_memref_parts (mem2, &reg2, &off2, &size2))
32068 return ((REGNO (reg1) == REGNO (reg2))
32069 && (((off1 <= off2) && (off1 + size1 > off2))
32070 || ((off2 <= off1) && (off2 + size2 > off1))));
32072 return false;
32075 /* A C statement (sans semicolon) to update the integer scheduling
32076 priority INSN_PRIORITY (INSN). Increase the priority to execute the
32077 INSN earlier, reduce the priority to execute INSN later. Do not
32078 define this macro if you do not need to adjust the scheduling
32079 priorities of insns. */
32081 static int
32082 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
32084 rtx load_mem, str_mem;
32085 /* On machines (like the 750) which have asymmetric integer units,
32086 where one integer unit can do multiply and divides and the other
32087 can't, reduce the priority of multiply/divide so it is scheduled
32088 before other integer operations. */
32090 #if 0
32091 if (! INSN_P (insn))
32092 return priority;
32094 if (GET_CODE (PATTERN (insn)) == USE)
32095 return priority;
32097 switch (rs6000_cpu_attr) {
32098 case CPU_PPC750:
32099 switch (get_attr_type (insn))
32101 default:
32102 break;
32104 case TYPE_MUL:
32105 case TYPE_DIV:
32106 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
32107 priority, priority);
32108 if (priority >= 0 && priority < 0x01000000)
32109 priority >>= 3;
32110 break;
32113 #endif
32115 if (insn_must_be_first_in_group (insn)
32116 && reload_completed
32117 && current_sched_info->sched_max_insns_priority
32118 && rs6000_sched_restricted_insns_priority)
32121 /* Prioritize insns that can be dispatched only in the first
32122 dispatch slot. */
32123 if (rs6000_sched_restricted_insns_priority == 1)
32124 /* Attach highest priority to insn. This means that in
32125 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
32126 precede 'priority' (critical path) considerations. */
32127 return current_sched_info->sched_max_insns_priority;
32128 else if (rs6000_sched_restricted_insns_priority == 2)
32129 /* Increase priority of insn by a minimal amount. This means that in
32130 haifa-sched.c:ready_sort(), only 'priority' (critical path)
32131 considerations precede dispatch-slot restriction considerations. */
32132 return (priority + 1);
32135 if (rs6000_cpu == PROCESSOR_POWER6
32136 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
32137 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
32138 /* Attach highest priority to insn if the scheduler has just issued two
32139 stores and this instruction is a load, or two loads and this instruction
32140 is a store. Power6 wants loads and stores scheduled alternately
32141 when possible */
32142 return current_sched_info->sched_max_insns_priority;
32144 return priority;
32147 /* Return true if the instruction is nonpipelined on the Cell. */
32148 static bool
32149 is_nonpipeline_insn (rtx_insn *insn)
32151 enum attr_type type;
32152 if (!insn || !NONDEBUG_INSN_P (insn)
32153 || GET_CODE (PATTERN (insn)) == USE
32154 || GET_CODE (PATTERN (insn)) == CLOBBER)
32155 return false;
32157 type = get_attr_type (insn);
32158 if (type == TYPE_MUL
32159 || type == TYPE_DIV
32160 || type == TYPE_SDIV
32161 || type == TYPE_DDIV
32162 || type == TYPE_SSQRT
32163 || type == TYPE_DSQRT
32164 || type == TYPE_MFCR
32165 || type == TYPE_MFCRF
32166 || type == TYPE_MFJMPR)
32168 return true;
32170 return false;
32174 /* Return how many instructions the machine can issue per cycle. */
32176 static int
32177 rs6000_issue_rate (void)
32179 /* Unless scheduling for register pressure, use issue rate of 1 for
32180 first scheduling pass to decrease degradation. */
32181 if (!reload_completed && !flag_sched_pressure)
32182 return 1;
32184 switch (rs6000_cpu_attr) {
32185 case CPU_RS64A:
32186 case CPU_PPC601: /* ? */
32187 case CPU_PPC7450:
32188 return 3;
32189 case CPU_PPC440:
32190 case CPU_PPC603:
32191 case CPU_PPC750:
32192 case CPU_PPC7400:
32193 case CPU_PPC8540:
32194 case CPU_PPC8548:
32195 case CPU_CELL:
32196 case CPU_PPCE300C2:
32197 case CPU_PPCE300C3:
32198 case CPU_PPCE500MC:
32199 case CPU_PPCE500MC64:
32200 case CPU_PPCE5500:
32201 case CPU_PPCE6500:
32202 case CPU_TITAN:
32203 return 2;
32204 case CPU_PPC476:
32205 case CPU_PPC604:
32206 case CPU_PPC604E:
32207 case CPU_PPC620:
32208 case CPU_PPC630:
32209 return 4;
32210 case CPU_POWER4:
32211 case CPU_POWER5:
32212 case CPU_POWER6:
32213 case CPU_POWER7:
32214 return 5;
32215 case CPU_POWER8:
32216 return 7;
32217 case CPU_POWER9:
32218 return 6;
32219 default:
32220 return 1;
32224 /* Return how many instructions to look ahead for better insn
32225 scheduling. */
32227 static int
32228 rs6000_use_sched_lookahead (void)
32230 switch (rs6000_cpu_attr)
32232 case CPU_PPC8540:
32233 case CPU_PPC8548:
32234 return 4;
32236 case CPU_CELL:
32237 return (reload_completed ? 8 : 0);
32239 default:
32240 return 0;
32244 /* We are choosing insn from the ready queue. Return zero if INSN can be
32245 chosen. */
32246 static int
32247 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
32249 if (ready_index == 0)
32250 return 0;
32252 if (rs6000_cpu_attr != CPU_CELL)
32253 return 0;
32255 gcc_assert (insn != NULL_RTX && INSN_P (insn));
32257 if (!reload_completed
32258 || is_nonpipeline_insn (insn)
32259 || is_microcoded_insn (insn))
32260 return 1;
32262 return 0;
32265 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
32266 and return true. */
32268 static bool
32269 find_mem_ref (rtx pat, rtx *mem_ref)
32271 const char * fmt;
32272 int i, j;
32274 /* stack_tie does not produce any real memory traffic. */
32275 if (tie_operand (pat, VOIDmode))
32276 return false;
32278 if (GET_CODE (pat) == MEM)
32280 *mem_ref = pat;
32281 return true;
32284 /* Recursively process the pattern. */
32285 fmt = GET_RTX_FORMAT (GET_CODE (pat));
32287 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
32289 if (fmt[i] == 'e')
32291 if (find_mem_ref (XEXP (pat, i), mem_ref))
32292 return true;
32294 else if (fmt[i] == 'E')
32295 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
32297 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
32298 return true;
32302 return false;
32305 /* Determine if PAT is a PATTERN of a load insn. */
32307 static bool
32308 is_load_insn1 (rtx pat, rtx *load_mem)
32310 if (!pat || pat == NULL_RTX)
32311 return false;
32313 if (GET_CODE (pat) == SET)
32314 return find_mem_ref (SET_SRC (pat), load_mem);
32316 if (GET_CODE (pat) == PARALLEL)
32318 int i;
32320 for (i = 0; i < XVECLEN (pat, 0); i++)
32321 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
32322 return true;
32325 return false;
32328 /* Determine if INSN loads from memory. */
32330 static bool
32331 is_load_insn (rtx insn, rtx *load_mem)
32333 if (!insn || !INSN_P (insn))
32334 return false;
32336 if (CALL_P (insn))
32337 return false;
32339 return is_load_insn1 (PATTERN (insn), load_mem);
32342 /* Determine if PAT is a PATTERN of a store insn. */
32344 static bool
32345 is_store_insn1 (rtx pat, rtx *str_mem)
32347 if (!pat || pat == NULL_RTX)
32348 return false;
32350 if (GET_CODE (pat) == SET)
32351 return find_mem_ref (SET_DEST (pat), str_mem);
32353 if (GET_CODE (pat) == PARALLEL)
32355 int i;
32357 for (i = 0; i < XVECLEN (pat, 0); i++)
32358 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
32359 return true;
32362 return false;
32365 /* Determine if INSN stores to memory. */
32367 static bool
32368 is_store_insn (rtx insn, rtx *str_mem)
32370 if (!insn || !INSN_P (insn))
32371 return false;
32373 return is_store_insn1 (PATTERN (insn), str_mem);
32376 /* Return whether TYPE is a Power9 pairable vector instruction type. */
32378 static bool
32379 is_power9_pairable_vec_type (enum attr_type type)
32381 switch (type)
32383 case TYPE_VECSIMPLE:
32384 case TYPE_VECCOMPLEX:
32385 case TYPE_VECDIV:
32386 case TYPE_VECCMP:
32387 case TYPE_VECPERM:
32388 case TYPE_VECFLOAT:
32389 case TYPE_VECFDIV:
32390 case TYPE_VECDOUBLE:
32391 return true;
32392 default:
32393 break;
32395 return false;
32398 /* Returns whether the dependence between INSN and NEXT is considered
32399 costly by the given target. */
32401 static bool
32402 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
32404 rtx insn;
32405 rtx next;
32406 rtx load_mem, str_mem;
32408 /* If the flag is not enabled - no dependence is considered costly;
32409 allow all dependent insns in the same group.
32410 This is the most aggressive option. */
32411 if (rs6000_sched_costly_dep == no_dep_costly)
32412 return false;
32414 /* If the flag is set to 1 - a dependence is always considered costly;
32415 do not allow dependent instructions in the same group.
32416 This is the most conservative option. */
32417 if (rs6000_sched_costly_dep == all_deps_costly)
32418 return true;
32420 insn = DEP_PRO (dep);
32421 next = DEP_CON (dep);
32423 if (rs6000_sched_costly_dep == store_to_load_dep_costly
32424 && is_load_insn (next, &load_mem)
32425 && is_store_insn (insn, &str_mem))
32426 /* Prevent load after store in the same group. */
32427 return true;
32429 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
32430 && is_load_insn (next, &load_mem)
32431 && is_store_insn (insn, &str_mem)
32432 && DEP_TYPE (dep) == REG_DEP_TRUE
32433 && mem_locations_overlap(str_mem, load_mem))
32434 /* Prevent load after store in the same group if it is a true
32435 dependence. */
32436 return true;
32438 /* The flag is set to X; dependences with latency >= X are considered costly,
32439 and will not be scheduled in the same group. */
32440 if (rs6000_sched_costly_dep <= max_dep_latency
32441 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
32442 return true;
32444 return false;
32447 /* Return the next insn after INSN that is found before TAIL is reached,
32448 skipping any "non-active" insns - insns that will not actually occupy
32449 an issue slot. Return NULL_RTX if such an insn is not found. */
32451 static rtx_insn *
32452 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
32454 if (insn == NULL_RTX || insn == tail)
32455 return NULL;
32457 while (1)
32459 insn = NEXT_INSN (insn);
32460 if (insn == NULL_RTX || insn == tail)
32461 return NULL;
32463 if (CALL_P (insn)
32464 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
32465 || (NONJUMP_INSN_P (insn)
32466 && GET_CODE (PATTERN (insn)) != USE
32467 && GET_CODE (PATTERN (insn)) != CLOBBER
32468 && INSN_CODE (insn) != CODE_FOR_stack_tie))
32469 break;
32471 return insn;
32474 /* Do Power9 specific sched_reorder2 reordering of ready list. */
32476 static int
32477 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
32479 int pos;
32480 int i;
32481 rtx_insn *tmp;
32482 enum attr_type type;
32484 type = get_attr_type (last_scheduled_insn);
32486 /* Try to issue fixed point divides back-to-back in pairs so they will be
32487 routed to separate execution units and execute in parallel. */
32488 if (type == TYPE_DIV && divide_cnt == 0)
32490 /* First divide has been scheduled. */
32491 divide_cnt = 1;
32493 /* Scan the ready list looking for another divide, if found move it
32494 to the end of the list so it is chosen next. */
32495 pos = lastpos;
32496 while (pos >= 0)
32498 if (recog_memoized (ready[pos]) >= 0
32499 && get_attr_type (ready[pos]) == TYPE_DIV)
32501 tmp = ready[pos];
32502 for (i = pos; i < lastpos; i++)
32503 ready[i] = ready[i + 1];
32504 ready[lastpos] = tmp;
32505 break;
32507 pos--;
32510 else
32512 /* Last insn was the 2nd divide or not a divide, reset the counter. */
32513 divide_cnt = 0;
32515 /* Power9 can execute 2 vector operations and 2 vector loads in a single
32516 cycle. So try to pair up and alternate groups of vector and vector
32517 load instructions.
32519 To aid this formation, a counter is maintained to keep track of
32520 vec/vecload insns issued. The value of vec_load_pendulum maintains
32521 the current state with the following values:
32523 0 : Initial state, no vec/vecload group has been started.
32525 -1 : 1 vector load has been issued and another has been found on
32526 the ready list and moved to the end.
32528 -2 : 2 vector loads have been issued and a vector operation has
32529 been found and moved to the end of the ready list.
32531 -3 : 2 vector loads and a vector insn have been issued and a
32532 vector operation has been found and moved to the end of the
32533 ready list.
32535 1 : 1 vector insn has been issued and another has been found and
32536 moved to the end of the ready list.
32538 2 : 2 vector insns have been issued and a vector load has been
32539 found and moved to the end of the ready list.
32541 3 : 2 vector insns and a vector load have been issued and another
32542 vector load has been found and moved to the end of the ready
32543 list. */
32544 if (type == TYPE_VECLOAD)
32546 /* Issued a vecload. */
32547 if (vec_load_pendulum == 0)
32549 /* We issued a single vecload, look for another and move it to
32550 the end of the ready list so it will be scheduled next.
32551 Set pendulum if found. */
32552 pos = lastpos;
32553 while (pos >= 0)
32555 if (recog_memoized (ready[pos]) >= 0
32556 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32558 tmp = ready[pos];
32559 for (i = pos; i < lastpos; i++)
32560 ready[i] = ready[i + 1];
32561 ready[lastpos] = tmp;
32562 vec_load_pendulum = -1;
32563 return cached_can_issue_more;
32565 pos--;
32568 else if (vec_load_pendulum == -1)
32570 /* This is the second vecload we've issued, search the ready
32571 list for a vector operation so we can try to schedule a
32572 pair of those next. If found move to the end of the ready
32573 list so it is scheduled next and set the pendulum. */
32574 pos = lastpos;
32575 while (pos >= 0)
32577 if (recog_memoized (ready[pos]) >= 0
32578 && is_power9_pairable_vec_type (
32579 get_attr_type (ready[pos])))
32581 tmp = ready[pos];
32582 for (i = pos; i < lastpos; i++)
32583 ready[i] = ready[i + 1];
32584 ready[lastpos] = tmp;
32585 vec_load_pendulum = -2;
32586 return cached_can_issue_more;
32588 pos--;
32591 else if (vec_load_pendulum == 2)
32593 /* Two vector ops have been issued and we've just issued a
32594 vecload, look for another vecload and move to end of ready
32595 list if found. */
32596 pos = lastpos;
32597 while (pos >= 0)
32599 if (recog_memoized (ready[pos]) >= 0
32600 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32602 tmp = ready[pos];
32603 for (i = pos; i < lastpos; i++)
32604 ready[i] = ready[i + 1];
32605 ready[lastpos] = tmp;
32606 /* Set pendulum so that next vecload will be seen as
32607 finishing a group, not start of one. */
32608 vec_load_pendulum = 3;
32609 return cached_can_issue_more;
32611 pos--;
32615 else if (is_power9_pairable_vec_type (type))
32617 /* Issued a vector operation. */
32618 if (vec_load_pendulum == 0)
32619 /* We issued a single vec op, look for another and move it
32620 to the end of the ready list so it will be scheduled next.
32621 Set pendulum if found. */
32623 pos = lastpos;
32624 while (pos >= 0)
32626 if (recog_memoized (ready[pos]) >= 0
32627 && is_power9_pairable_vec_type (
32628 get_attr_type (ready[pos])))
32630 tmp = ready[pos];
32631 for (i = pos; i < lastpos; i++)
32632 ready[i] = ready[i + 1];
32633 ready[lastpos] = tmp;
32634 vec_load_pendulum = 1;
32635 return cached_can_issue_more;
32637 pos--;
32640 else if (vec_load_pendulum == 1)
32642 /* This is the second vec op we've issued, search the ready
32643 list for a vecload operation so we can try to schedule a
32644 pair of those next. If found move to the end of the ready
32645 list so it is scheduled next and set the pendulum. */
32646 pos = lastpos;
32647 while (pos >= 0)
32649 if (recog_memoized (ready[pos]) >= 0
32650 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32652 tmp = ready[pos];
32653 for (i = pos; i < lastpos; i++)
32654 ready[i] = ready[i + 1];
32655 ready[lastpos] = tmp;
32656 vec_load_pendulum = 2;
32657 return cached_can_issue_more;
32659 pos--;
32662 else if (vec_load_pendulum == -2)
32664 /* Two vecload ops have been issued and we've just issued a
32665 vec op, look for another vec op and move to end of ready
32666 list if found. */
32667 pos = lastpos;
32668 while (pos >= 0)
32670 if (recog_memoized (ready[pos]) >= 0
32671 && is_power9_pairable_vec_type (
32672 get_attr_type (ready[pos])))
32674 tmp = ready[pos];
32675 for (i = pos; i < lastpos; i++)
32676 ready[i] = ready[i + 1];
32677 ready[lastpos] = tmp;
32678 /* Set pendulum so that next vec op will be seen as
32679 finishing a group, not start of one. */
32680 vec_load_pendulum = -3;
32681 return cached_can_issue_more;
32683 pos--;
32688 /* We've either finished a vec/vecload group, couldn't find an insn to
32689 continue the current group, or the last insn had nothing to do with
32690 with a group. In any case, reset the pendulum. */
32691 vec_load_pendulum = 0;
32694 return cached_can_issue_more;
32697 /* We are about to begin issuing insns for this clock cycle. */
32699 static int
32700 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
32701 rtx_insn **ready ATTRIBUTE_UNUSED,
32702 int *pn_ready ATTRIBUTE_UNUSED,
32703 int clock_var ATTRIBUTE_UNUSED)
32705 int n_ready = *pn_ready;
32707 if (sched_verbose)
32708 fprintf (dump, "// rs6000_sched_reorder :\n");
32710 /* Reorder the ready list, if the second to last ready insn
32711 is a nonepipeline insn. */
32712 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
32714 if (is_nonpipeline_insn (ready[n_ready - 1])
32715 && (recog_memoized (ready[n_ready - 2]) > 0))
32716 /* Simply swap first two insns. */
32717 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
32720 if (rs6000_cpu == PROCESSOR_POWER6)
32721 load_store_pendulum = 0;
32723 return rs6000_issue_rate ();
32726 /* Like rs6000_sched_reorder, but called after issuing each insn. */
32728 static int
32729 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
32730 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
32732 if (sched_verbose)
32733 fprintf (dump, "// rs6000_sched_reorder2 :\n");
32735 /* For Power6, we need to handle some special cases to try and keep the
32736 store queue from overflowing and triggering expensive flushes.
32738 This code monitors how load and store instructions are being issued
32739 and skews the ready list one way or the other to increase the likelihood
32740 that a desired instruction is issued at the proper time.
32742 A couple of things are done. First, we maintain a "load_store_pendulum"
32743 to track the current state of load/store issue.
32745 - If the pendulum is at zero, then no loads or stores have been
32746 issued in the current cycle so we do nothing.
32748 - If the pendulum is 1, then a single load has been issued in this
32749 cycle and we attempt to locate another load in the ready list to
32750 issue with it.
32752 - If the pendulum is -2, then two stores have already been
32753 issued in this cycle, so we increase the priority of the first load
32754 in the ready list to increase it's likelihood of being chosen first
32755 in the next cycle.
32757 - If the pendulum is -1, then a single store has been issued in this
32758 cycle and we attempt to locate another store in the ready list to
32759 issue with it, preferring a store to an adjacent memory location to
32760 facilitate store pairing in the store queue.
32762 - If the pendulum is 2, then two loads have already been
32763 issued in this cycle, so we increase the priority of the first store
32764 in the ready list to increase it's likelihood of being chosen first
32765 in the next cycle.
32767 - If the pendulum < -2 or > 2, then do nothing.
32769 Note: This code covers the most common scenarios. There exist non
32770 load/store instructions which make use of the LSU and which
32771 would need to be accounted for to strictly model the behavior
32772 of the machine. Those instructions are currently unaccounted
32773 for to help minimize compile time overhead of this code.
32775 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
32777 int pos;
32778 int i;
32779 rtx_insn *tmp;
32780 rtx load_mem, str_mem;
32782 if (is_store_insn (last_scheduled_insn, &str_mem))
32783 /* Issuing a store, swing the load_store_pendulum to the left */
32784 load_store_pendulum--;
32785 else if (is_load_insn (last_scheduled_insn, &load_mem))
32786 /* Issuing a load, swing the load_store_pendulum to the right */
32787 load_store_pendulum++;
32788 else
32789 return cached_can_issue_more;
32791 /* If the pendulum is balanced, or there is only one instruction on
32792 the ready list, then all is well, so return. */
32793 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
32794 return cached_can_issue_more;
32796 if (load_store_pendulum == 1)
32798 /* A load has been issued in this cycle. Scan the ready list
32799 for another load to issue with it */
32800 pos = *pn_ready-1;
32802 while (pos >= 0)
32804 if (is_load_insn (ready[pos], &load_mem))
32806 /* Found a load. Move it to the head of the ready list,
32807 and adjust it's priority so that it is more likely to
32808 stay there */
32809 tmp = ready[pos];
32810 for (i=pos; i<*pn_ready-1; i++)
32811 ready[i] = ready[i + 1];
32812 ready[*pn_ready-1] = tmp;
32814 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32815 INSN_PRIORITY (tmp)++;
32816 break;
32818 pos--;
32821 else if (load_store_pendulum == -2)
32823 /* Two stores have been issued in this cycle. Increase the
32824 priority of the first load in the ready list to favor it for
32825 issuing in the next cycle. */
32826 pos = *pn_ready-1;
32828 while (pos >= 0)
32830 if (is_load_insn (ready[pos], &load_mem)
32831 && !sel_sched_p ()
32832 && INSN_PRIORITY_KNOWN (ready[pos]))
32834 INSN_PRIORITY (ready[pos])++;
32836 /* Adjust the pendulum to account for the fact that a load
32837 was found and increased in priority. This is to prevent
32838 increasing the priority of multiple loads */
32839 load_store_pendulum--;
32841 break;
32843 pos--;
32846 else if (load_store_pendulum == -1)
32848 /* A store has been issued in this cycle. Scan the ready list for
32849 another store to issue with it, preferring a store to an adjacent
32850 memory location */
32851 int first_store_pos = -1;
32853 pos = *pn_ready-1;
32855 while (pos >= 0)
32857 if (is_store_insn (ready[pos], &str_mem))
32859 rtx str_mem2;
32860 /* Maintain the index of the first store found on the
32861 list */
32862 if (first_store_pos == -1)
32863 first_store_pos = pos;
32865 if (is_store_insn (last_scheduled_insn, &str_mem2)
32866 && adjacent_mem_locations (str_mem, str_mem2))
32868 /* Found an adjacent store. Move it to the head of the
32869 ready list, and adjust it's priority so that it is
32870 more likely to stay there */
32871 tmp = ready[pos];
32872 for (i=pos; i<*pn_ready-1; i++)
32873 ready[i] = ready[i + 1];
32874 ready[*pn_ready-1] = tmp;
32876 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32877 INSN_PRIORITY (tmp)++;
32879 first_store_pos = -1;
32881 break;
32884 pos--;
32887 if (first_store_pos >= 0)
32889 /* An adjacent store wasn't found, but a non-adjacent store was,
32890 so move the non-adjacent store to the front of the ready
32891 list, and adjust its priority so that it is more likely to
32892 stay there. */
32893 tmp = ready[first_store_pos];
32894 for (i=first_store_pos; i<*pn_ready-1; i++)
32895 ready[i] = ready[i + 1];
32896 ready[*pn_ready-1] = tmp;
32897 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32898 INSN_PRIORITY (tmp)++;
32901 else if (load_store_pendulum == 2)
32903 /* Two loads have been issued in this cycle. Increase the priority
32904 of the first store in the ready list to favor it for issuing in
32905 the next cycle. */
32906 pos = *pn_ready-1;
32908 while (pos >= 0)
32910 if (is_store_insn (ready[pos], &str_mem)
32911 && !sel_sched_p ()
32912 && INSN_PRIORITY_KNOWN (ready[pos]))
32914 INSN_PRIORITY (ready[pos])++;
32916 /* Adjust the pendulum to account for the fact that a store
32917 was found and increased in priority. This is to prevent
32918 increasing the priority of multiple stores */
32919 load_store_pendulum++;
32921 break;
32923 pos--;
32928 /* Do Power9 dependent reordering if necessary. */
32929 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
32930 && recog_memoized (last_scheduled_insn) >= 0)
32931 return power9_sched_reorder2 (ready, *pn_ready - 1);
32933 return cached_can_issue_more;
32936 /* Return whether the presence of INSN causes a dispatch group termination
32937 of group WHICH_GROUP.
32939 If WHICH_GROUP == current_group, this function will return true if INSN
32940 causes the termination of the current group (i.e, the dispatch group to
32941 which INSN belongs). This means that INSN will be the last insn in the
32942 group it belongs to.
32944 If WHICH_GROUP == previous_group, this function will return true if INSN
32945 causes the termination of the previous group (i.e, the dispatch group that
32946 precedes the group to which INSN belongs). This means that INSN will be
32947 the first insn in the group it belongs to). */
32949 static bool
32950 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
32952 bool first, last;
32954 if (! insn)
32955 return false;
32957 first = insn_must_be_first_in_group (insn);
32958 last = insn_must_be_last_in_group (insn);
32960 if (first && last)
32961 return true;
32963 if (which_group == current_group)
32964 return last;
32965 else if (which_group == previous_group)
32966 return first;
32968 return false;
32972 static bool
32973 insn_must_be_first_in_group (rtx_insn *insn)
32975 enum attr_type type;
32977 if (!insn
32978 || NOTE_P (insn)
32979 || DEBUG_INSN_P (insn)
32980 || GET_CODE (PATTERN (insn)) == USE
32981 || GET_CODE (PATTERN (insn)) == CLOBBER)
32982 return false;
32984 switch (rs6000_cpu)
32986 case PROCESSOR_POWER5:
32987 if (is_cracked_insn (insn))
32988 return true;
32989 /* FALLTHRU */
32990 case PROCESSOR_POWER4:
32991 if (is_microcoded_insn (insn))
32992 return true;
32994 if (!rs6000_sched_groups)
32995 return false;
32997 type = get_attr_type (insn);
32999 switch (type)
33001 case TYPE_MFCR:
33002 case TYPE_MFCRF:
33003 case TYPE_MTCR:
33004 case TYPE_DELAYED_CR:
33005 case TYPE_CR_LOGICAL:
33006 case TYPE_MTJMPR:
33007 case TYPE_MFJMPR:
33008 case TYPE_DIV:
33009 case TYPE_LOAD_L:
33010 case TYPE_STORE_C:
33011 case TYPE_ISYNC:
33012 case TYPE_SYNC:
33013 return true;
33014 default:
33015 break;
33017 break;
33018 case PROCESSOR_POWER6:
33019 type = get_attr_type (insn);
33021 switch (type)
33023 case TYPE_EXTS:
33024 case TYPE_CNTLZ:
33025 case TYPE_TRAP:
33026 case TYPE_MUL:
33027 case TYPE_INSERT:
33028 case TYPE_FPCOMPARE:
33029 case TYPE_MFCR:
33030 case TYPE_MTCR:
33031 case TYPE_MFJMPR:
33032 case TYPE_MTJMPR:
33033 case TYPE_ISYNC:
33034 case TYPE_SYNC:
33035 case TYPE_LOAD_L:
33036 case TYPE_STORE_C:
33037 return true;
33038 case TYPE_SHIFT:
33039 if (get_attr_dot (insn) == DOT_NO
33040 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33041 return true;
33042 else
33043 break;
33044 case TYPE_DIV:
33045 if (get_attr_size (insn) == SIZE_32)
33046 return true;
33047 else
33048 break;
33049 case TYPE_LOAD:
33050 case TYPE_STORE:
33051 case TYPE_FPLOAD:
33052 case TYPE_FPSTORE:
33053 if (get_attr_update (insn) == UPDATE_YES)
33054 return true;
33055 else
33056 break;
33057 default:
33058 break;
33060 break;
33061 case PROCESSOR_POWER7:
33062 type = get_attr_type (insn);
33064 switch (type)
33066 case TYPE_CR_LOGICAL:
33067 case TYPE_MFCR:
33068 case TYPE_MFCRF:
33069 case TYPE_MTCR:
33070 case TYPE_DIV:
33071 case TYPE_ISYNC:
33072 case TYPE_LOAD_L:
33073 case TYPE_STORE_C:
33074 case TYPE_MFJMPR:
33075 case TYPE_MTJMPR:
33076 return true;
33077 case TYPE_MUL:
33078 case TYPE_SHIFT:
33079 case TYPE_EXTS:
33080 if (get_attr_dot (insn) == DOT_YES)
33081 return true;
33082 else
33083 break;
33084 case TYPE_LOAD:
33085 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33086 || get_attr_update (insn) == UPDATE_YES)
33087 return true;
33088 else
33089 break;
33090 case TYPE_STORE:
33091 case TYPE_FPLOAD:
33092 case TYPE_FPSTORE:
33093 if (get_attr_update (insn) == UPDATE_YES)
33094 return true;
33095 else
33096 break;
33097 default:
33098 break;
33100 break;
33101 case PROCESSOR_POWER8:
33102 type = get_attr_type (insn);
33104 switch (type)
33106 case TYPE_CR_LOGICAL:
33107 case TYPE_DELAYED_CR:
33108 case TYPE_MFCR:
33109 case TYPE_MFCRF:
33110 case TYPE_MTCR:
33111 case TYPE_SYNC:
33112 case TYPE_ISYNC:
33113 case TYPE_LOAD_L:
33114 case TYPE_STORE_C:
33115 case TYPE_VECSTORE:
33116 case TYPE_MFJMPR:
33117 case TYPE_MTJMPR:
33118 return true;
33119 case TYPE_SHIFT:
33120 case TYPE_EXTS:
33121 case TYPE_MUL:
33122 if (get_attr_dot (insn) == DOT_YES)
33123 return true;
33124 else
33125 break;
33126 case TYPE_LOAD:
33127 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33128 || get_attr_update (insn) == UPDATE_YES)
33129 return true;
33130 else
33131 break;
33132 case TYPE_STORE:
33133 if (get_attr_update (insn) == UPDATE_YES
33134 && get_attr_indexed (insn) == INDEXED_YES)
33135 return true;
33136 else
33137 break;
33138 default:
33139 break;
33141 break;
33142 default:
33143 break;
33146 return false;
33149 static bool
33150 insn_must_be_last_in_group (rtx_insn *insn)
33152 enum attr_type type;
33154 if (!insn
33155 || NOTE_P (insn)
33156 || DEBUG_INSN_P (insn)
33157 || GET_CODE (PATTERN (insn)) == USE
33158 || GET_CODE (PATTERN (insn)) == CLOBBER)
33159 return false;
33161 switch (rs6000_cpu) {
33162 case PROCESSOR_POWER4:
33163 case PROCESSOR_POWER5:
33164 if (is_microcoded_insn (insn))
33165 return true;
33167 if (is_branch_slot_insn (insn))
33168 return true;
33170 break;
33171 case PROCESSOR_POWER6:
33172 type = get_attr_type (insn);
33174 switch (type)
33176 case TYPE_EXTS:
33177 case TYPE_CNTLZ:
33178 case TYPE_TRAP:
33179 case TYPE_MUL:
33180 case TYPE_FPCOMPARE:
33181 case TYPE_MFCR:
33182 case TYPE_MTCR:
33183 case TYPE_MFJMPR:
33184 case TYPE_MTJMPR:
33185 case TYPE_ISYNC:
33186 case TYPE_SYNC:
33187 case TYPE_LOAD_L:
33188 case TYPE_STORE_C:
33189 return true;
33190 case TYPE_SHIFT:
33191 if (get_attr_dot (insn) == DOT_NO
33192 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
33193 return true;
33194 else
33195 break;
33196 case TYPE_DIV:
33197 if (get_attr_size (insn) == SIZE_32)
33198 return true;
33199 else
33200 break;
33201 default:
33202 break;
33204 break;
33205 case PROCESSOR_POWER7:
33206 type = get_attr_type (insn);
33208 switch (type)
33210 case TYPE_ISYNC:
33211 case TYPE_SYNC:
33212 case TYPE_LOAD_L:
33213 case TYPE_STORE_C:
33214 return true;
33215 case TYPE_LOAD:
33216 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33217 && get_attr_update (insn) == UPDATE_YES)
33218 return true;
33219 else
33220 break;
33221 case TYPE_STORE:
33222 if (get_attr_update (insn) == UPDATE_YES
33223 && get_attr_indexed (insn) == INDEXED_YES)
33224 return true;
33225 else
33226 break;
33227 default:
33228 break;
33230 break;
33231 case PROCESSOR_POWER8:
33232 type = get_attr_type (insn);
33234 switch (type)
33236 case TYPE_MFCR:
33237 case TYPE_MTCR:
33238 case TYPE_ISYNC:
33239 case TYPE_SYNC:
33240 case TYPE_LOAD_L:
33241 case TYPE_STORE_C:
33242 return true;
33243 case TYPE_LOAD:
33244 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33245 && get_attr_update (insn) == UPDATE_YES)
33246 return true;
33247 else
33248 break;
33249 case TYPE_STORE:
33250 if (get_attr_update (insn) == UPDATE_YES
33251 && get_attr_indexed (insn) == INDEXED_YES)
33252 return true;
33253 else
33254 break;
33255 default:
33256 break;
33258 break;
33259 default:
33260 break;
33263 return false;
33266 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
33267 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
33269 static bool
33270 is_costly_group (rtx *group_insns, rtx next_insn)
33272 int i;
33273 int issue_rate = rs6000_issue_rate ();
33275 for (i = 0; i < issue_rate; i++)
33277 sd_iterator_def sd_it;
33278 dep_t dep;
33279 rtx insn = group_insns[i];
33281 if (!insn)
33282 continue;
33284 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
33286 rtx next = DEP_CON (dep);
33288 if (next == next_insn
33289 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
33290 return true;
33294 return false;
33297 /* Utility of the function redefine_groups.
33298 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
33299 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
33300 to keep it "far" (in a separate group) from GROUP_INSNS, following
33301 one of the following schemes, depending on the value of the flag
33302 -minsert_sched_nops = X:
33303 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
33304 in order to force NEXT_INSN into a separate group.
33305 (2) X < sched_finish_regroup_exact: insert exactly X nops.
33306 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
33307 insertion (has a group just ended, how many vacant issue slots remain in the
33308 last group, and how many dispatch groups were encountered so far). */
33310 static int
33311 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
33312 rtx_insn *next_insn, bool *group_end, int can_issue_more,
33313 int *group_count)
33315 rtx nop;
33316 bool force;
33317 int issue_rate = rs6000_issue_rate ();
33318 bool end = *group_end;
33319 int i;
33321 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
33322 return can_issue_more;
33324 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
33325 return can_issue_more;
33327 force = is_costly_group (group_insns, next_insn);
33328 if (!force)
33329 return can_issue_more;
33331 if (sched_verbose > 6)
33332 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
33333 *group_count ,can_issue_more);
33335 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
33337 if (*group_end)
33338 can_issue_more = 0;
33340 /* Since only a branch can be issued in the last issue_slot, it is
33341 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
33342 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
33343 in this case the last nop will start a new group and the branch
33344 will be forced to the new group. */
33345 if (can_issue_more && !is_branch_slot_insn (next_insn))
33346 can_issue_more--;
33348 /* Do we have a special group ending nop? */
33349 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
33350 || rs6000_cpu_attr == CPU_POWER8)
33352 nop = gen_group_ending_nop ();
33353 emit_insn_before (nop, next_insn);
33354 can_issue_more = 0;
33356 else
33357 while (can_issue_more > 0)
33359 nop = gen_nop ();
33360 emit_insn_before (nop, next_insn);
33361 can_issue_more--;
33364 *group_end = true;
33365 return 0;
33368 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
33370 int n_nops = rs6000_sched_insert_nops;
33372 /* Nops can't be issued from the branch slot, so the effective
33373 issue_rate for nops is 'issue_rate - 1'. */
33374 if (can_issue_more == 0)
33375 can_issue_more = issue_rate;
33376 can_issue_more--;
33377 if (can_issue_more == 0)
33379 can_issue_more = issue_rate - 1;
33380 (*group_count)++;
33381 end = true;
33382 for (i = 0; i < issue_rate; i++)
33384 group_insns[i] = 0;
33388 while (n_nops > 0)
33390 nop = gen_nop ();
33391 emit_insn_before (nop, next_insn);
33392 if (can_issue_more == issue_rate - 1) /* new group begins */
33393 end = false;
33394 can_issue_more--;
33395 if (can_issue_more == 0)
33397 can_issue_more = issue_rate - 1;
33398 (*group_count)++;
33399 end = true;
33400 for (i = 0; i < issue_rate; i++)
33402 group_insns[i] = 0;
33405 n_nops--;
33408 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
33409 can_issue_more++;
33411 /* Is next_insn going to start a new group? */
33412 *group_end
33413 = (end
33414 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33415 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33416 || (can_issue_more < issue_rate &&
33417 insn_terminates_group_p (next_insn, previous_group)));
33418 if (*group_end && end)
33419 (*group_count)--;
33421 if (sched_verbose > 6)
33422 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
33423 *group_count, can_issue_more);
33424 return can_issue_more;
33427 return can_issue_more;
33430 /* This function tries to synch the dispatch groups that the compiler "sees"
33431 with the dispatch groups that the processor dispatcher is expected to
33432 form in practice. It tries to achieve this synchronization by forcing the
33433 estimated processor grouping on the compiler (as opposed to the function
33434 'pad_goups' which tries to force the scheduler's grouping on the processor).
33436 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
33437 examines the (estimated) dispatch groups that will be formed by the processor
33438 dispatcher. It marks these group boundaries to reflect the estimated
33439 processor grouping, overriding the grouping that the scheduler had marked.
33440 Depending on the value of the flag '-minsert-sched-nops' this function can
33441 force certain insns into separate groups or force a certain distance between
33442 them by inserting nops, for example, if there exists a "costly dependence"
33443 between the insns.
33445 The function estimates the group boundaries that the processor will form as
33446 follows: It keeps track of how many vacant issue slots are available after
33447 each insn. A subsequent insn will start a new group if one of the following
33448 4 cases applies:
33449 - no more vacant issue slots remain in the current dispatch group.
33450 - only the last issue slot, which is the branch slot, is vacant, but the next
33451 insn is not a branch.
33452 - only the last 2 or less issue slots, including the branch slot, are vacant,
33453 which means that a cracked insn (which occupies two issue slots) can't be
33454 issued in this group.
33455 - less than 'issue_rate' slots are vacant, and the next insn always needs to
33456 start a new group. */
33458 static int
33459 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33460 rtx_insn *tail)
33462 rtx_insn *insn, *next_insn;
33463 int issue_rate;
33464 int can_issue_more;
33465 int slot, i;
33466 bool group_end;
33467 int group_count = 0;
33468 rtx *group_insns;
33470 /* Initialize. */
33471 issue_rate = rs6000_issue_rate ();
33472 group_insns = XALLOCAVEC (rtx, issue_rate);
33473 for (i = 0; i < issue_rate; i++)
33475 group_insns[i] = 0;
33477 can_issue_more = issue_rate;
33478 slot = 0;
33479 insn = get_next_active_insn (prev_head_insn, tail);
33480 group_end = false;
33482 while (insn != NULL_RTX)
33484 slot = (issue_rate - can_issue_more);
33485 group_insns[slot] = insn;
33486 can_issue_more =
33487 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33488 if (insn_terminates_group_p (insn, current_group))
33489 can_issue_more = 0;
33491 next_insn = get_next_active_insn (insn, tail);
33492 if (next_insn == NULL_RTX)
33493 return group_count + 1;
33495 /* Is next_insn going to start a new group? */
33496 group_end
33497 = (can_issue_more == 0
33498 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
33499 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
33500 || (can_issue_more < issue_rate &&
33501 insn_terminates_group_p (next_insn, previous_group)));
33503 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
33504 next_insn, &group_end, can_issue_more,
33505 &group_count);
33507 if (group_end)
33509 group_count++;
33510 can_issue_more = 0;
33511 for (i = 0; i < issue_rate; i++)
33513 group_insns[i] = 0;
33517 if (GET_MODE (next_insn) == TImode && can_issue_more)
33518 PUT_MODE (next_insn, VOIDmode);
33519 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
33520 PUT_MODE (next_insn, TImode);
33522 insn = next_insn;
33523 if (can_issue_more == 0)
33524 can_issue_more = issue_rate;
33525 } /* while */
33527 return group_count;
33530 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
33531 dispatch group boundaries that the scheduler had marked. Pad with nops
33532 any dispatch groups which have vacant issue slots, in order to force the
33533 scheduler's grouping on the processor dispatcher. The function
33534 returns the number of dispatch groups found. */
33536 static int
33537 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
33538 rtx_insn *tail)
33540 rtx_insn *insn, *next_insn;
33541 rtx nop;
33542 int issue_rate;
33543 int can_issue_more;
33544 int group_end;
33545 int group_count = 0;
33547 /* Initialize issue_rate. */
33548 issue_rate = rs6000_issue_rate ();
33549 can_issue_more = issue_rate;
33551 insn = get_next_active_insn (prev_head_insn, tail);
33552 next_insn = get_next_active_insn (insn, tail);
33554 while (insn != NULL_RTX)
33556 can_issue_more =
33557 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
33559 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
33561 if (next_insn == NULL_RTX)
33562 break;
33564 if (group_end)
33566 /* If the scheduler had marked group termination at this location
33567 (between insn and next_insn), and neither insn nor next_insn will
33568 force group termination, pad the group with nops to force group
33569 termination. */
33570 if (can_issue_more
33571 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
33572 && !insn_terminates_group_p (insn, current_group)
33573 && !insn_terminates_group_p (next_insn, previous_group))
33575 if (!is_branch_slot_insn (next_insn))
33576 can_issue_more--;
33578 while (can_issue_more)
33580 nop = gen_nop ();
33581 emit_insn_before (nop, next_insn);
33582 can_issue_more--;
33586 can_issue_more = issue_rate;
33587 group_count++;
33590 insn = next_insn;
33591 next_insn = get_next_active_insn (insn, tail);
33594 return group_count;
33597 /* We're beginning a new block. Initialize data structures as necessary. */
33599 static void
33600 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
33601 int sched_verbose ATTRIBUTE_UNUSED,
33602 int max_ready ATTRIBUTE_UNUSED)
33604 last_scheduled_insn = NULL;
33605 load_store_pendulum = 0;
33606 divide_cnt = 0;
33607 vec_load_pendulum = 0;
33610 /* The following function is called at the end of scheduling BB.
33611 After reload, it inserts nops at insn group bundling. */
33613 static void
33614 rs6000_sched_finish (FILE *dump, int sched_verbose)
33616 int n_groups;
33618 if (sched_verbose)
33619 fprintf (dump, "=== Finishing schedule.\n");
33621 if (reload_completed && rs6000_sched_groups)
33623 /* Do not run sched_finish hook when selective scheduling enabled. */
33624 if (sel_sched_p ())
33625 return;
33627 if (rs6000_sched_insert_nops == sched_finish_none)
33628 return;
33630 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
33631 n_groups = pad_groups (dump, sched_verbose,
33632 current_sched_info->prev_head,
33633 current_sched_info->next_tail);
33634 else
33635 n_groups = redefine_groups (dump, sched_verbose,
33636 current_sched_info->prev_head,
33637 current_sched_info->next_tail);
33639 if (sched_verbose >= 6)
33641 fprintf (dump, "ngroups = %d\n", n_groups);
33642 print_rtl (dump, current_sched_info->prev_head);
33643 fprintf (dump, "Done finish_sched\n");
33648 struct rs6000_sched_context
33650 short cached_can_issue_more;
33651 rtx_insn *last_scheduled_insn;
33652 int load_store_pendulum;
33653 int divide_cnt;
33654 int vec_load_pendulum;
33657 typedef struct rs6000_sched_context rs6000_sched_context_def;
33658 typedef rs6000_sched_context_def *rs6000_sched_context_t;
33660 /* Allocate store for new scheduling context. */
33661 static void *
33662 rs6000_alloc_sched_context (void)
33664 return xmalloc (sizeof (rs6000_sched_context_def));
33667 /* If CLEAN_P is true then initializes _SC with clean data,
33668 and from the global context otherwise. */
33669 static void
33670 rs6000_init_sched_context (void *_sc, bool clean_p)
33672 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33674 if (clean_p)
33676 sc->cached_can_issue_more = 0;
33677 sc->last_scheduled_insn = NULL;
33678 sc->load_store_pendulum = 0;
33679 sc->divide_cnt = 0;
33680 sc->vec_load_pendulum = 0;
33682 else
33684 sc->cached_can_issue_more = cached_can_issue_more;
33685 sc->last_scheduled_insn = last_scheduled_insn;
33686 sc->load_store_pendulum = load_store_pendulum;
33687 sc->divide_cnt = divide_cnt;
33688 sc->vec_load_pendulum = vec_load_pendulum;
33692 /* Sets the global scheduling context to the one pointed to by _SC. */
33693 static void
33694 rs6000_set_sched_context (void *_sc)
33696 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33698 gcc_assert (sc != NULL);
33700 cached_can_issue_more = sc->cached_can_issue_more;
33701 last_scheduled_insn = sc->last_scheduled_insn;
33702 load_store_pendulum = sc->load_store_pendulum;
33703 divide_cnt = sc->divide_cnt;
33704 vec_load_pendulum = sc->vec_load_pendulum;
33707 /* Free _SC. */
33708 static void
33709 rs6000_free_sched_context (void *_sc)
33711 gcc_assert (_sc != NULL);
33713 free (_sc);
33717 /* Length in units of the trampoline for entering a nested function. */
33720 rs6000_trampoline_size (void)
33722 int ret = 0;
33724 switch (DEFAULT_ABI)
33726 default:
33727 gcc_unreachable ();
33729 case ABI_AIX:
33730 ret = (TARGET_32BIT) ? 12 : 24;
33731 break;
33733 case ABI_ELFv2:
33734 gcc_assert (!TARGET_32BIT);
33735 ret = 32;
33736 break;
33738 case ABI_DARWIN:
33739 case ABI_V4:
33740 ret = (TARGET_32BIT) ? 40 : 48;
33741 break;
33744 return ret;
33747 /* Emit RTL insns to initialize the variable parts of a trampoline.
33748 FNADDR is an RTX for the address of the function's pure code.
33749 CXT is an RTX for the static chain value for the function. */
33751 static void
33752 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
33754 int regsize = (TARGET_32BIT) ? 4 : 8;
33755 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
33756 rtx ctx_reg = force_reg (Pmode, cxt);
33757 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
33759 switch (DEFAULT_ABI)
33761 default:
33762 gcc_unreachable ();
33764 /* Under AIX, just build the 3 word function descriptor */
33765 case ABI_AIX:
33767 rtx fnmem, fn_reg, toc_reg;
33769 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33770 error ("You cannot take the address of a nested function if you use "
33771 "the -mno-pointers-to-nested-functions option.");
33773 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
33774 fn_reg = gen_reg_rtx (Pmode);
33775 toc_reg = gen_reg_rtx (Pmode);
33777 /* Macro to shorten the code expansions below. */
33778 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
33780 m_tramp = replace_equiv_address (m_tramp, addr);
33782 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
33783 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
33784 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
33785 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
33786 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
33788 # undef MEM_PLUS
33790 break;
33792 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
33793 case ABI_ELFv2:
33794 case ABI_DARWIN:
33795 case ABI_V4:
33796 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
33797 LCT_NORMAL, VOIDmode, 4,
33798 addr, Pmode,
33799 GEN_INT (rs6000_trampoline_size ()), SImode,
33800 fnaddr, Pmode,
33801 ctx_reg, Pmode);
33802 break;
33807 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
33808 identifier as an argument, so the front end shouldn't look it up. */
33810 static bool
33811 rs6000_attribute_takes_identifier_p (const_tree attr_id)
33813 return is_attribute_p ("altivec", attr_id);
33816 /* Handle the "altivec" attribute. The attribute may have
33817 arguments as follows:
33819 __attribute__((altivec(vector__)))
33820 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
33821 __attribute__((altivec(bool__))) (always followed by 'unsigned')
33823 and may appear more than once (e.g., 'vector bool char') in a
33824 given declaration. */
33826 static tree
33827 rs6000_handle_altivec_attribute (tree *node,
33828 tree name ATTRIBUTE_UNUSED,
33829 tree args,
33830 int flags ATTRIBUTE_UNUSED,
33831 bool *no_add_attrs)
33833 tree type = *node, result = NULL_TREE;
33834 machine_mode mode;
33835 int unsigned_p;
33836 char altivec_type
33837 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
33838 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
33839 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
33840 : '?');
33842 while (POINTER_TYPE_P (type)
33843 || TREE_CODE (type) == FUNCTION_TYPE
33844 || TREE_CODE (type) == METHOD_TYPE
33845 || TREE_CODE (type) == ARRAY_TYPE)
33846 type = TREE_TYPE (type);
33848 mode = TYPE_MODE (type);
33850 /* Check for invalid AltiVec type qualifiers. */
33851 if (type == long_double_type_node)
33852 error ("use of %<long double%> in AltiVec types is invalid");
33853 else if (type == boolean_type_node)
33854 error ("use of boolean types in AltiVec types is invalid");
33855 else if (TREE_CODE (type) == COMPLEX_TYPE)
33856 error ("use of %<complex%> in AltiVec types is invalid");
33857 else if (DECIMAL_FLOAT_MODE_P (mode))
33858 error ("use of decimal floating point types in AltiVec types is invalid");
33859 else if (!TARGET_VSX)
33861 if (type == long_unsigned_type_node || type == long_integer_type_node)
33863 if (TARGET_64BIT)
33864 error ("use of %<long%> in AltiVec types is invalid for "
33865 "64-bit code without -mvsx");
33866 else if (rs6000_warn_altivec_long)
33867 warning (0, "use of %<long%> in AltiVec types is deprecated; "
33868 "use %<int%>");
33870 else if (type == long_long_unsigned_type_node
33871 || type == long_long_integer_type_node)
33872 error ("use of %<long long%> in AltiVec types is invalid without "
33873 "-mvsx");
33874 else if (type == double_type_node)
33875 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
33878 switch (altivec_type)
33880 case 'v':
33881 unsigned_p = TYPE_UNSIGNED (type);
33882 switch (mode)
33884 case TImode:
33885 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
33886 break;
33887 case DImode:
33888 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
33889 break;
33890 case SImode:
33891 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
33892 break;
33893 case HImode:
33894 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
33895 break;
33896 case QImode:
33897 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
33898 break;
33899 case SFmode: result = V4SF_type_node; break;
33900 case DFmode: result = V2DF_type_node; break;
33901 /* If the user says 'vector int bool', we may be handed the 'bool'
33902 attribute _before_ the 'vector' attribute, and so select the
33903 proper type in the 'b' case below. */
33904 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
33905 case V2DImode: case V2DFmode:
33906 result = type;
33907 default: break;
33909 break;
33910 case 'b':
33911 switch (mode)
33913 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
33914 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
33915 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
33916 case QImode: case V16QImode: result = bool_V16QI_type_node;
33917 default: break;
33919 break;
33920 case 'p':
33921 switch (mode)
33923 case V8HImode: result = pixel_V8HI_type_node;
33924 default: break;
33926 default: break;
33929 /* Propagate qualifiers attached to the element type
33930 onto the vector type. */
33931 if (result && result != type && TYPE_QUALS (type))
33932 result = build_qualified_type (result, TYPE_QUALS (type));
33934 *no_add_attrs = true; /* No need to hang on to the attribute. */
33936 if (result)
33937 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
33939 return NULL_TREE;
33942 /* AltiVec defines four built-in scalar types that serve as vector
33943 elements; we must teach the compiler how to mangle them. */
33945 static const char *
33946 rs6000_mangle_type (const_tree type)
33948 type = TYPE_MAIN_VARIANT (type);
33950 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33951 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33952 return NULL;
33954 if (type == bool_char_type_node) return "U6__boolc";
33955 if (type == bool_short_type_node) return "U6__bools";
33956 if (type == pixel_type_node) return "u7__pixel";
33957 if (type == bool_int_type_node) return "U6__booli";
33958 if (type == bool_long_type_node) return "U6__booll";
33960 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
33961 "g" for IBM extended double, no matter whether it is long double (using
33962 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
33963 if (TARGET_FLOAT128_TYPE)
33965 if (type == ieee128_float_type_node)
33966 return "U10__float128";
33968 if (type == ibm128_float_type_node)
33969 return "g";
33971 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
33972 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
33975 /* Mangle IBM extended float long double as `g' (__float128) on
33976 powerpc*-linux where long-double-64 previously was the default. */
33977 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
33978 && TARGET_ELF
33979 && TARGET_LONG_DOUBLE_128
33980 && !TARGET_IEEEQUAD)
33981 return "g";
33983 /* For all other types, use normal C++ mangling. */
33984 return NULL;
33987 /* Handle a "longcall" or "shortcall" attribute; arguments as in
33988 struct attribute_spec.handler. */
33990 static tree
33991 rs6000_handle_longcall_attribute (tree *node, tree name,
33992 tree args ATTRIBUTE_UNUSED,
33993 int flags ATTRIBUTE_UNUSED,
33994 bool *no_add_attrs)
33996 if (TREE_CODE (*node) != FUNCTION_TYPE
33997 && TREE_CODE (*node) != FIELD_DECL
33998 && TREE_CODE (*node) != TYPE_DECL)
34000 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34001 name);
34002 *no_add_attrs = true;
34005 return NULL_TREE;
34008 /* Set longcall attributes on all functions declared when
34009 rs6000_default_long_calls is true. */
34010 static void
34011 rs6000_set_default_type_attributes (tree type)
34013 if (rs6000_default_long_calls
34014 && (TREE_CODE (type) == FUNCTION_TYPE
34015 || TREE_CODE (type) == METHOD_TYPE))
34016 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
34017 NULL_TREE,
34018 TYPE_ATTRIBUTES (type));
34020 #if TARGET_MACHO
34021 darwin_set_default_type_attributes (type);
34022 #endif
34025 /* Return a reference suitable for calling a function with the
34026 longcall attribute. */
34029 rs6000_longcall_ref (rtx call_ref)
34031 const char *call_name;
34032 tree node;
34034 if (GET_CODE (call_ref) != SYMBOL_REF)
34035 return call_ref;
34037 /* System V adds '.' to the internal name, so skip them. */
34038 call_name = XSTR (call_ref, 0);
34039 if (*call_name == '.')
34041 while (*call_name == '.')
34042 call_name++;
34044 node = get_identifier (call_name);
34045 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
34048 return force_reg (Pmode, call_ref);
34051 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
34052 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
34053 #endif
34055 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34056 struct attribute_spec.handler. */
34057 static tree
34058 rs6000_handle_struct_attribute (tree *node, tree name,
34059 tree args ATTRIBUTE_UNUSED,
34060 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34062 tree *type = NULL;
34063 if (DECL_P (*node))
34065 if (TREE_CODE (*node) == TYPE_DECL)
34066 type = &TREE_TYPE (*node);
34068 else
34069 type = node;
34071 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
34072 || TREE_CODE (*type) == UNION_TYPE)))
34074 warning (OPT_Wattributes, "%qE attribute ignored", name);
34075 *no_add_attrs = true;
34078 else if ((is_attribute_p ("ms_struct", name)
34079 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
34080 || ((is_attribute_p ("gcc_struct", name)
34081 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
34083 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
34084 name);
34085 *no_add_attrs = true;
34088 return NULL_TREE;
34091 static bool
34092 rs6000_ms_bitfield_layout_p (const_tree record_type)
34094 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
34095 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
34096 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
34099 #ifdef USING_ELFOS_H
34101 /* A get_unnamed_section callback, used for switching to toc_section. */
34103 static void
34104 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34106 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34107 && TARGET_MINIMAL_TOC)
34109 if (!toc_initialized)
34111 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34112 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34113 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
34114 fprintf (asm_out_file, "\t.tc ");
34115 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
34116 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34117 fprintf (asm_out_file, "\n");
34119 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34120 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34121 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34122 fprintf (asm_out_file, " = .+32768\n");
34123 toc_initialized = 1;
34125 else
34126 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34128 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34130 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
34131 if (!toc_initialized)
34133 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34134 toc_initialized = 1;
34137 else
34139 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34140 if (!toc_initialized)
34142 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
34143 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
34144 fprintf (asm_out_file, " = .+32768\n");
34145 toc_initialized = 1;
34150 /* Implement TARGET_ASM_INIT_SECTIONS. */
34152 static void
34153 rs6000_elf_asm_init_sections (void)
34155 toc_section
34156 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
34158 sdata2_section
34159 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
34160 SDATA2_SECTION_ASM_OP);
34163 /* Implement TARGET_SELECT_RTX_SECTION. */
34165 static section *
34166 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
34167 unsigned HOST_WIDE_INT align)
34169 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34170 return toc_section;
34171 else
34172 return default_elf_select_rtx_section (mode, x, align);
34175 /* For a SYMBOL_REF, set generic flags and then perform some
34176 target-specific processing.
34178 When the AIX ABI is requested on a non-AIX system, replace the
34179 function name with the real name (with a leading .) rather than the
34180 function descriptor name. This saves a lot of overriding code to
34181 read the prefixes. */
34183 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
34184 static void
34185 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
34187 default_encode_section_info (decl, rtl, first);
34189 if (first
34190 && TREE_CODE (decl) == FUNCTION_DECL
34191 && !TARGET_AIX
34192 && DEFAULT_ABI == ABI_AIX)
34194 rtx sym_ref = XEXP (rtl, 0);
34195 size_t len = strlen (XSTR (sym_ref, 0));
34196 char *str = XALLOCAVEC (char, len + 2);
34197 str[0] = '.';
34198 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
34199 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
34203 static inline bool
34204 compare_section_name (const char *section, const char *templ)
34206 int len;
34208 len = strlen (templ);
34209 return (strncmp (section, templ, len) == 0
34210 && (section[len] == 0 || section[len] == '.'));
34213 bool
34214 rs6000_elf_in_small_data_p (const_tree decl)
34216 if (rs6000_sdata == SDATA_NONE)
34217 return false;
34219 /* We want to merge strings, so we never consider them small data. */
34220 if (TREE_CODE (decl) == STRING_CST)
34221 return false;
34223 /* Functions are never in the small data area. */
34224 if (TREE_CODE (decl) == FUNCTION_DECL)
34225 return false;
34227 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
34229 const char *section = DECL_SECTION_NAME (decl);
34230 if (compare_section_name (section, ".sdata")
34231 || compare_section_name (section, ".sdata2")
34232 || compare_section_name (section, ".gnu.linkonce.s")
34233 || compare_section_name (section, ".sbss")
34234 || compare_section_name (section, ".sbss2")
34235 || compare_section_name (section, ".gnu.linkonce.sb")
34236 || strcmp (section, ".PPC.EMB.sdata0") == 0
34237 || strcmp (section, ".PPC.EMB.sbss0") == 0)
34238 return true;
34240 else
34242 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
34244 if (size > 0
34245 && size <= g_switch_value
34246 /* If it's not public, and we're not going to reference it there,
34247 there's no need to put it in the small data section. */
34248 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
34249 return true;
34252 return false;
34255 #endif /* USING_ELFOS_H */
34257 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
34259 static bool
34260 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
34262 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
34265 /* Do not place thread-local symbols refs in the object blocks. */
34267 static bool
34268 rs6000_use_blocks_for_decl_p (const_tree decl)
34270 return !DECL_THREAD_LOCAL_P (decl);
34273 /* Return a REG that occurs in ADDR with coefficient 1.
34274 ADDR can be effectively incremented by incrementing REG.
34276 r0 is special and we must not select it as an address
34277 register by this routine since our caller will try to
34278 increment the returned register via an "la" instruction. */
34281 find_addr_reg (rtx addr)
34283 while (GET_CODE (addr) == PLUS)
34285 if (GET_CODE (XEXP (addr, 0)) == REG
34286 && REGNO (XEXP (addr, 0)) != 0)
34287 addr = XEXP (addr, 0);
34288 else if (GET_CODE (XEXP (addr, 1)) == REG
34289 && REGNO (XEXP (addr, 1)) != 0)
34290 addr = XEXP (addr, 1);
34291 else if (CONSTANT_P (XEXP (addr, 0)))
34292 addr = XEXP (addr, 1);
34293 else if (CONSTANT_P (XEXP (addr, 1)))
34294 addr = XEXP (addr, 0);
34295 else
34296 gcc_unreachable ();
34298 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
34299 return addr;
34302 void
34303 rs6000_fatal_bad_address (rtx op)
34305 fatal_insn ("bad address", op);
34308 #if TARGET_MACHO
34310 typedef struct branch_island_d {
34311 tree function_name;
34312 tree label_name;
34313 int line_number;
34314 } branch_island;
34317 static vec<branch_island, va_gc> *branch_islands;
34319 /* Remember to generate a branch island for far calls to the given
34320 function. */
34322 static void
34323 add_compiler_branch_island (tree label_name, tree function_name,
34324 int line_number)
34326 branch_island bi = {function_name, label_name, line_number};
34327 vec_safe_push (branch_islands, bi);
34330 /* Generate far-jump branch islands for everything recorded in
34331 branch_islands. Invoked immediately after the last instruction of
34332 the epilogue has been emitted; the branch islands must be appended
34333 to, and contiguous with, the function body. Mach-O stubs are
34334 generated in machopic_output_stub(). */
34336 static void
34337 macho_branch_islands (void)
34339 char tmp_buf[512];
34341 while (!vec_safe_is_empty (branch_islands))
34343 branch_island *bi = &branch_islands->last ();
34344 const char *label = IDENTIFIER_POINTER (bi->label_name);
34345 const char *name = IDENTIFIER_POINTER (bi->function_name);
34346 char name_buf[512];
34347 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
34348 if (name[0] == '*' || name[0] == '&')
34349 strcpy (name_buf, name+1);
34350 else
34352 name_buf[0] = '_';
34353 strcpy (name_buf+1, name);
34355 strcpy (tmp_buf, "\n");
34356 strcat (tmp_buf, label);
34357 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34358 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34359 dbxout_stabd (N_SLINE, bi->line_number);
34360 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34361 if (flag_pic)
34363 if (TARGET_LINK_STACK)
34365 char name[32];
34366 get_ppc476_thunk_name (name);
34367 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
34368 strcat (tmp_buf, name);
34369 strcat (tmp_buf, "\n");
34370 strcat (tmp_buf, label);
34371 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34373 else
34375 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
34376 strcat (tmp_buf, label);
34377 strcat (tmp_buf, "_pic\n");
34378 strcat (tmp_buf, label);
34379 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
34382 strcat (tmp_buf, "\taddis r11,r11,ha16(");
34383 strcat (tmp_buf, name_buf);
34384 strcat (tmp_buf, " - ");
34385 strcat (tmp_buf, label);
34386 strcat (tmp_buf, "_pic)\n");
34388 strcat (tmp_buf, "\tmtlr r0\n");
34390 strcat (tmp_buf, "\taddi r12,r11,lo16(");
34391 strcat (tmp_buf, name_buf);
34392 strcat (tmp_buf, " - ");
34393 strcat (tmp_buf, label);
34394 strcat (tmp_buf, "_pic)\n");
34396 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
34398 else
34400 strcat (tmp_buf, ":\nlis r12,hi16(");
34401 strcat (tmp_buf, name_buf);
34402 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
34403 strcat (tmp_buf, name_buf);
34404 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
34406 output_asm_insn (tmp_buf, 0);
34407 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
34408 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34409 dbxout_stabd (N_SLINE, bi->line_number);
34410 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
34411 branch_islands->pop ();
34415 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
34416 already there or not. */
34418 static int
34419 no_previous_def (tree function_name)
34421 branch_island *bi;
34422 unsigned ix;
34424 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34425 if (function_name == bi->function_name)
34426 return 0;
34427 return 1;
34430 /* GET_PREV_LABEL gets the label name from the previous definition of
34431 the function. */
34433 static tree
34434 get_prev_label (tree function_name)
34436 branch_island *bi;
34437 unsigned ix;
34439 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
34440 if (function_name == bi->function_name)
34441 return bi->label_name;
34442 return NULL_TREE;
34445 /* INSN is either a function call or a millicode call. It may have an
34446 unconditional jump in its delay slot.
34448 CALL_DEST is the routine we are calling. */
34450 char *
34451 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
34452 int cookie_operand_number)
34454 static char buf[256];
34455 if (darwin_emit_branch_islands
34456 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
34457 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
34459 tree labelname;
34460 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
34462 if (no_previous_def (funname))
34464 rtx label_rtx = gen_label_rtx ();
34465 char *label_buf, temp_buf[256];
34466 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
34467 CODE_LABEL_NUMBER (label_rtx));
34468 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
34469 labelname = get_identifier (label_buf);
34470 add_compiler_branch_island (labelname, funname, insn_line (insn));
34472 else
34473 labelname = get_prev_label (funname);
34475 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
34476 instruction will reach 'foo', otherwise link as 'bl L42'".
34477 "L42" should be a 'branch island', that will do a far jump to
34478 'foo'. Branch islands are generated in
34479 macho_branch_islands(). */
34480 sprintf (buf, "jbsr %%z%d,%.246s",
34481 dest_operand_number, IDENTIFIER_POINTER (labelname));
34483 else
34484 sprintf (buf, "bl %%z%d", dest_operand_number);
34485 return buf;
34488 /* Generate PIC and indirect symbol stubs. */
34490 void
34491 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34493 unsigned int length;
34494 char *symbol_name, *lazy_ptr_name;
34495 char *local_label_0;
34496 static int label = 0;
34498 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34499 symb = (*targetm.strip_name_encoding) (symb);
34502 length = strlen (symb);
34503 symbol_name = XALLOCAVEC (char, length + 32);
34504 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34506 lazy_ptr_name = XALLOCAVEC (char, length + 32);
34507 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
34509 if (flag_pic == 2)
34510 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
34511 else
34512 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
34514 if (flag_pic == 2)
34516 fprintf (file, "\t.align 5\n");
34518 fprintf (file, "%s:\n", stub);
34519 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34521 label++;
34522 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
34523 sprintf (local_label_0, "\"L%011d$spb\"", label);
34525 fprintf (file, "\tmflr r0\n");
34526 if (TARGET_LINK_STACK)
34528 char name[32];
34529 get_ppc476_thunk_name (name);
34530 fprintf (file, "\tbl %s\n", name);
34531 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34533 else
34535 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
34536 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
34538 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
34539 lazy_ptr_name, local_label_0);
34540 fprintf (file, "\tmtlr r0\n");
34541 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
34542 (TARGET_64BIT ? "ldu" : "lwzu"),
34543 lazy_ptr_name, local_label_0);
34544 fprintf (file, "\tmtctr r12\n");
34545 fprintf (file, "\tbctr\n");
34547 else
34549 fprintf (file, "\t.align 4\n");
34551 fprintf (file, "%s:\n", stub);
34552 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34554 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
34555 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
34556 (TARGET_64BIT ? "ldu" : "lwzu"),
34557 lazy_ptr_name);
34558 fprintf (file, "\tmtctr r12\n");
34559 fprintf (file, "\tbctr\n");
34562 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
34563 fprintf (file, "%s:\n", lazy_ptr_name);
34564 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34565 fprintf (file, "%sdyld_stub_binding_helper\n",
34566 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
34569 /* Legitimize PIC addresses. If the address is already
34570 position-independent, we return ORIG. Newly generated
34571 position-independent addresses go into a reg. This is REG if non
34572 zero, otherwise we allocate register(s) as necessary. */
34574 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
34577 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
34578 rtx reg)
34580 rtx base, offset;
34582 if (reg == NULL && ! reload_in_progress && ! reload_completed)
34583 reg = gen_reg_rtx (Pmode);
34585 if (GET_CODE (orig) == CONST)
34587 rtx reg_temp;
34589 if (GET_CODE (XEXP (orig, 0)) == PLUS
34590 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
34591 return orig;
34593 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
34595 /* Use a different reg for the intermediate value, as
34596 it will be marked UNCHANGING. */
34597 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
34598 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
34599 Pmode, reg_temp);
34600 offset =
34601 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
34602 Pmode, reg);
34604 if (GET_CODE (offset) == CONST_INT)
34606 if (SMALL_INT (offset))
34607 return plus_constant (Pmode, base, INTVAL (offset));
34608 else if (! reload_in_progress && ! reload_completed)
34609 offset = force_reg (Pmode, offset);
34610 else
34612 rtx mem = force_const_mem (Pmode, orig);
34613 return machopic_legitimize_pic_address (mem, Pmode, reg);
34616 return gen_rtx_PLUS (Pmode, base, offset);
34619 /* Fall back on generic machopic code. */
34620 return machopic_legitimize_pic_address (orig, mode, reg);
34623 /* Output a .machine directive for the Darwin assembler, and call
34624 the generic start_file routine. */
34626 static void
34627 rs6000_darwin_file_start (void)
34629 static const struct
34631 const char *arg;
34632 const char *name;
34633 HOST_WIDE_INT if_set;
34634 } mapping[] = {
34635 { "ppc64", "ppc64", MASK_64BIT },
34636 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
34637 { "power4", "ppc970", 0 },
34638 { "G5", "ppc970", 0 },
34639 { "7450", "ppc7450", 0 },
34640 { "7400", "ppc7400", MASK_ALTIVEC },
34641 { "G4", "ppc7400", 0 },
34642 { "750", "ppc750", 0 },
34643 { "740", "ppc750", 0 },
34644 { "G3", "ppc750", 0 },
34645 { "604e", "ppc604e", 0 },
34646 { "604", "ppc604", 0 },
34647 { "603e", "ppc603", 0 },
34648 { "603", "ppc603", 0 },
34649 { "601", "ppc601", 0 },
34650 { NULL, "ppc", 0 } };
34651 const char *cpu_id = "";
34652 size_t i;
34654 rs6000_file_start ();
34655 darwin_file_start ();
34657 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
34659 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
34660 cpu_id = rs6000_default_cpu;
34662 if (global_options_set.x_rs6000_cpu_index)
34663 cpu_id = processor_target_table[rs6000_cpu_index].name;
34665 /* Look through the mapping array. Pick the first name that either
34666 matches the argument, has a bit set in IF_SET that is also set
34667 in the target flags, or has a NULL name. */
34669 i = 0;
34670 while (mapping[i].arg != NULL
34671 && strcmp (mapping[i].arg, cpu_id) != 0
34672 && (mapping[i].if_set & rs6000_isa_flags) == 0)
34673 i++;
34675 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
34678 #endif /* TARGET_MACHO */
34680 #if TARGET_ELF
34681 static int
34682 rs6000_elf_reloc_rw_mask (void)
34684 if (flag_pic)
34685 return 3;
34686 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34687 return 2;
34688 else
34689 return 0;
34692 /* Record an element in the table of global constructors. SYMBOL is
34693 a SYMBOL_REF of the function to be called; PRIORITY is a number
34694 between 0 and MAX_INIT_PRIORITY.
34696 This differs from default_named_section_asm_out_constructor in
34697 that we have special handling for -mrelocatable. */
34699 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
34700 static void
34701 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
34703 const char *section = ".ctors";
34704 char buf[18];
34706 if (priority != DEFAULT_INIT_PRIORITY)
34708 sprintf (buf, ".ctors.%.5u",
34709 /* Invert the numbering so the linker puts us in the proper
34710 order; constructors are run from right to left, and the
34711 linker sorts in increasing order. */
34712 MAX_INIT_PRIORITY - priority);
34713 section = buf;
34716 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34717 assemble_align (POINTER_SIZE);
34719 if (DEFAULT_ABI == ABI_V4
34720 && (TARGET_RELOCATABLE || flag_pic > 1))
34722 fputs ("\t.long (", asm_out_file);
34723 output_addr_const (asm_out_file, symbol);
34724 fputs (")@fixup\n", asm_out_file);
34726 else
34727 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34730 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
34731 static void
34732 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
34734 const char *section = ".dtors";
34735 char buf[18];
34737 if (priority != DEFAULT_INIT_PRIORITY)
34739 sprintf (buf, ".dtors.%.5u",
34740 /* Invert the numbering so the linker puts us in the proper
34741 order; constructors are run from right to left, and the
34742 linker sorts in increasing order. */
34743 MAX_INIT_PRIORITY - priority);
34744 section = buf;
34747 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34748 assemble_align (POINTER_SIZE);
34750 if (DEFAULT_ABI == ABI_V4
34751 && (TARGET_RELOCATABLE || flag_pic > 1))
34753 fputs ("\t.long (", asm_out_file);
34754 output_addr_const (asm_out_file, symbol);
34755 fputs (")@fixup\n", asm_out_file);
34757 else
34758 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34761 void
34762 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
34764 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
34766 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
34767 ASM_OUTPUT_LABEL (file, name);
34768 fputs (DOUBLE_INT_ASM_OP, file);
34769 rs6000_output_function_entry (file, name);
34770 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
34771 if (DOT_SYMBOLS)
34773 fputs ("\t.size\t", file);
34774 assemble_name (file, name);
34775 fputs (",24\n\t.type\t.", file);
34776 assemble_name (file, name);
34777 fputs (",@function\n", file);
34778 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
34780 fputs ("\t.globl\t.", file);
34781 assemble_name (file, name);
34782 putc ('\n', file);
34785 else
34786 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34787 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34788 rs6000_output_function_entry (file, name);
34789 fputs (":\n", file);
34790 return;
34793 if (DEFAULT_ABI == ABI_V4
34794 && (TARGET_RELOCATABLE || flag_pic > 1)
34795 && !TARGET_SECURE_PLT
34796 && (get_pool_size () != 0 || crtl->profile)
34797 && uses_TOC ())
34799 char buf[256];
34801 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34803 fprintf (file, "\t.long ");
34804 assemble_name (file, toc_label_name);
34805 need_toc_init = 1;
34806 putc ('-', file);
34807 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34808 assemble_name (file, buf);
34809 putc ('\n', file);
34812 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34813 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34815 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
34817 char buf[256];
34819 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34821 fprintf (file, "\t.quad .TOC.-");
34822 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34823 assemble_name (file, buf);
34824 putc ('\n', file);
34827 if (DEFAULT_ABI == ABI_AIX)
34829 const char *desc_name, *orig_name;
34831 orig_name = (*targetm.strip_name_encoding) (name);
34832 desc_name = orig_name;
34833 while (*desc_name == '.')
34834 desc_name++;
34836 if (TREE_PUBLIC (decl))
34837 fprintf (file, "\t.globl %s\n", desc_name);
34839 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34840 fprintf (file, "%s:\n", desc_name);
34841 fprintf (file, "\t.long %s\n", orig_name);
34842 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
34843 fputs ("\t.long 0\n", file);
34844 fprintf (file, "\t.previous\n");
34846 ASM_OUTPUT_LABEL (file, name);
34849 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
34850 static void
34851 rs6000_elf_file_end (void)
34853 #ifdef HAVE_AS_GNU_ATTRIBUTE
34854 /* ??? The value emitted depends on options active at file end.
34855 Assume anyone using #pragma or attributes that might change
34856 options knows what they are doing. */
34857 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
34858 && rs6000_passes_float)
34860 int fp;
34862 if (TARGET_DF_FPR | TARGET_DF_SPE)
34863 fp = 1;
34864 else if (TARGET_SF_FPR | TARGET_SF_SPE)
34865 fp = 3;
34866 else
34867 fp = 2;
34868 if (rs6000_passes_long_double)
34870 if (!TARGET_LONG_DOUBLE_128)
34871 fp |= 2 * 4;
34872 else if (TARGET_IEEEQUAD)
34873 fp |= 3 * 4;
34874 else
34875 fp |= 1 * 4;
34877 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
34879 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
34881 if (rs6000_passes_vector)
34882 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
34883 (TARGET_ALTIVEC_ABI ? 2
34884 : TARGET_SPE_ABI ? 3
34885 : 1));
34886 if (rs6000_returns_struct)
34887 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
34888 aix_struct_return ? 2 : 1);
34890 #endif
34891 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
34892 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
34893 file_end_indicate_exec_stack ();
34894 #endif
34896 if (flag_split_stack)
34897 file_end_indicate_split_stack ();
34899 if (cpu_builtin_p)
34901 /* We have expanded a CPU builtin, so we need to emit a reference to
34902 the special symbol that LIBC uses to declare it supports the
34903 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
34904 switch_to_section (data_section);
34905 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
34906 fprintf (asm_out_file, "\t%s %s\n",
34907 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
34910 #endif
34912 #if TARGET_XCOFF
34914 #ifndef HAVE_XCOFF_DWARF_EXTRAS
34915 #define HAVE_XCOFF_DWARF_EXTRAS 0
34916 #endif
34918 static enum unwind_info_type
34919 rs6000_xcoff_debug_unwind_info (void)
34921 return UI_NONE;
34924 static void
34925 rs6000_xcoff_asm_output_anchor (rtx symbol)
34927 char buffer[100];
34929 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
34930 SYMBOL_REF_BLOCK_OFFSET (symbol));
34931 fprintf (asm_out_file, "%s", SET_ASM_OP);
34932 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
34933 fprintf (asm_out_file, ",");
34934 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
34935 fprintf (asm_out_file, "\n");
34938 static void
34939 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
34941 fputs (GLOBAL_ASM_OP, stream);
34942 RS6000_OUTPUT_BASENAME (stream, name);
34943 putc ('\n', stream);
34946 /* A get_unnamed_decl callback, used for read-only sections. PTR
34947 points to the section string variable. */
34949 static void
34950 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
34952 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
34953 *(const char *const *) directive,
34954 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34957 /* Likewise for read-write sections. */
34959 static void
34960 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
34962 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
34963 *(const char *const *) directive,
34964 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34967 static void
34968 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
34970 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
34971 *(const char *const *) directive,
34972 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34975 /* A get_unnamed_section callback, used for switching to toc_section. */
34977 static void
34978 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34980 if (TARGET_MINIMAL_TOC)
34982 /* toc_section is always selected at least once from
34983 rs6000_xcoff_file_start, so this is guaranteed to
34984 always be defined once and only once in each file. */
34985 if (!toc_initialized)
34987 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
34988 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
34989 toc_initialized = 1;
34991 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
34992 (TARGET_32BIT ? "" : ",3"));
34994 else
34995 fputs ("\t.toc\n", asm_out_file);
34998 /* Implement TARGET_ASM_INIT_SECTIONS. */
35000 static void
35001 rs6000_xcoff_asm_init_sections (void)
35003 read_only_data_section
35004 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35005 &xcoff_read_only_section_name);
35007 private_data_section
35008 = get_unnamed_section (SECTION_WRITE,
35009 rs6000_xcoff_output_readwrite_section_asm_op,
35010 &xcoff_private_data_section_name);
35012 tls_data_section
35013 = get_unnamed_section (SECTION_TLS,
35014 rs6000_xcoff_output_tls_section_asm_op,
35015 &xcoff_tls_data_section_name);
35017 tls_private_data_section
35018 = get_unnamed_section (SECTION_TLS,
35019 rs6000_xcoff_output_tls_section_asm_op,
35020 &xcoff_private_data_section_name);
35022 read_only_private_data_section
35023 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
35024 &xcoff_private_data_section_name);
35026 toc_section
35027 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
35029 readonly_data_section = read_only_data_section;
35032 static int
35033 rs6000_xcoff_reloc_rw_mask (void)
35035 return 3;
35038 static void
35039 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
35040 tree decl ATTRIBUTE_UNUSED)
35042 int smclass;
35043 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
35045 if (flags & SECTION_EXCLUDE)
35046 smclass = 4;
35047 else if (flags & SECTION_DEBUG)
35049 fprintf (asm_out_file, "\t.dwsect %s\n", name);
35050 return;
35052 else if (flags & SECTION_CODE)
35053 smclass = 0;
35054 else if (flags & SECTION_TLS)
35055 smclass = 3;
35056 else if (flags & SECTION_WRITE)
35057 smclass = 2;
35058 else
35059 smclass = 1;
35061 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
35062 (flags & SECTION_CODE) ? "." : "",
35063 name, suffix[smclass], flags & SECTION_ENTSIZE);
35066 #define IN_NAMED_SECTION(DECL) \
35067 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
35068 && DECL_SECTION_NAME (DECL) != NULL)
35070 static section *
35071 rs6000_xcoff_select_section (tree decl, int reloc,
35072 unsigned HOST_WIDE_INT align)
35074 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
35075 named section. */
35076 if (align > BIGGEST_ALIGNMENT)
35078 resolve_unique_section (decl, reloc, true);
35079 if (IN_NAMED_SECTION (decl))
35080 return get_named_section (decl, NULL, reloc);
35083 if (decl_readonly_section (decl, reloc))
35085 if (TREE_PUBLIC (decl))
35086 return read_only_data_section;
35087 else
35088 return read_only_private_data_section;
35090 else
35092 #if HAVE_AS_TLS
35093 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35095 if (TREE_PUBLIC (decl))
35096 return tls_data_section;
35097 else if (bss_initializer_p (decl))
35099 /* Convert to COMMON to emit in BSS. */
35100 DECL_COMMON (decl) = 1;
35101 return tls_comm_section;
35103 else
35104 return tls_private_data_section;
35106 else
35107 #endif
35108 if (TREE_PUBLIC (decl))
35109 return data_section;
35110 else
35111 return private_data_section;
35115 static void
35116 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
35118 const char *name;
35120 /* Use select_section for private data and uninitialized data with
35121 alignment <= BIGGEST_ALIGNMENT. */
35122 if (!TREE_PUBLIC (decl)
35123 || DECL_COMMON (decl)
35124 || (DECL_INITIAL (decl) == NULL_TREE
35125 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
35126 || DECL_INITIAL (decl) == error_mark_node
35127 || (flag_zero_initialized_in_bss
35128 && initializer_zerop (DECL_INITIAL (decl))))
35129 return;
35131 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35132 name = (*targetm.strip_name_encoding) (name);
35133 set_decl_section_name (decl, name);
35136 /* Select section for constant in constant pool.
35138 On RS/6000, all constants are in the private read-only data area.
35139 However, if this is being placed in the TOC it must be output as a
35140 toc entry. */
35142 static section *
35143 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
35144 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
35146 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35147 return toc_section;
35148 else
35149 return read_only_private_data_section;
35152 /* Remove any trailing [DS] or the like from the symbol name. */
35154 static const char *
35155 rs6000_xcoff_strip_name_encoding (const char *name)
35157 size_t len;
35158 if (*name == '*')
35159 name++;
35160 len = strlen (name);
35161 if (name[len - 1] == ']')
35162 return ggc_alloc_string (name, len - 4);
35163 else
35164 return name;
35167 /* Section attributes. AIX is always PIC. */
35169 static unsigned int
35170 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
35172 unsigned int align;
35173 unsigned int flags = default_section_type_flags (decl, name, reloc);
35175 /* Align to at least UNIT size. */
35176 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
35177 align = MIN_UNITS_PER_WORD;
35178 else
35179 /* Increase alignment of large objects if not already stricter. */
35180 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
35181 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
35182 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
35184 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
35187 /* Output at beginning of assembler file.
35189 Initialize the section names for the RS/6000 at this point.
35191 Specify filename, including full path, to assembler.
35193 We want to go into the TOC section so at least one .toc will be emitted.
35194 Also, in order to output proper .bs/.es pairs, we need at least one static
35195 [RW] section emitted.
35197 Finally, declare mcount when profiling to make the assembler happy. */
35199 static void
35200 rs6000_xcoff_file_start (void)
35202 rs6000_gen_section_name (&xcoff_bss_section_name,
35203 main_input_filename, ".bss_");
35204 rs6000_gen_section_name (&xcoff_private_data_section_name,
35205 main_input_filename, ".rw_");
35206 rs6000_gen_section_name (&xcoff_read_only_section_name,
35207 main_input_filename, ".ro_");
35208 rs6000_gen_section_name (&xcoff_tls_data_section_name,
35209 main_input_filename, ".tls_");
35210 rs6000_gen_section_name (&xcoff_tbss_section_name,
35211 main_input_filename, ".tbss_[UL]");
35213 fputs ("\t.file\t", asm_out_file);
35214 output_quoted_string (asm_out_file, main_input_filename);
35215 fputc ('\n', asm_out_file);
35216 if (write_symbols != NO_DEBUG)
35217 switch_to_section (private_data_section);
35218 switch_to_section (toc_section);
35219 switch_to_section (text_section);
35220 if (profile_flag)
35221 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
35222 rs6000_file_start ();
35225 /* Output at end of assembler file.
35226 On the RS/6000, referencing data should automatically pull in text. */
35228 static void
35229 rs6000_xcoff_file_end (void)
35231 switch_to_section (text_section);
35232 fputs ("_section_.text:\n", asm_out_file);
35233 switch_to_section (data_section);
35234 fputs (TARGET_32BIT
35235 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
35236 asm_out_file);
35239 struct declare_alias_data
35241 FILE *file;
35242 bool function_descriptor;
35245 /* Declare alias N. A helper function for for_node_and_aliases. */
35247 static bool
35248 rs6000_declare_alias (struct symtab_node *n, void *d)
35250 struct declare_alias_data *data = (struct declare_alias_data *)d;
35251 /* Main symbol is output specially, because varasm machinery does part of
35252 the job for us - we do not need to declare .globl/lglobs and such. */
35253 if (!n->alias || n->weakref)
35254 return false;
35256 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
35257 return false;
35259 /* Prevent assemble_alias from trying to use .set pseudo operation
35260 that does not behave as expected by the middle-end. */
35261 TREE_ASM_WRITTEN (n->decl) = true;
35263 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
35264 char *buffer = (char *) alloca (strlen (name) + 2);
35265 char *p;
35266 int dollar_inside = 0;
35268 strcpy (buffer, name);
35269 p = strchr (buffer, '$');
35270 while (p) {
35271 *p = '_';
35272 dollar_inside++;
35273 p = strchr (p + 1, '$');
35275 if (TREE_PUBLIC (n->decl))
35277 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
35279 if (dollar_inside) {
35280 if (data->function_descriptor)
35281 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35282 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35284 if (data->function_descriptor)
35286 fputs ("\t.globl .", data->file);
35287 RS6000_OUTPUT_BASENAME (data->file, buffer);
35288 putc ('\n', data->file);
35290 fputs ("\t.globl ", data->file);
35291 RS6000_OUTPUT_BASENAME (data->file, buffer);
35292 putc ('\n', data->file);
35294 #ifdef ASM_WEAKEN_DECL
35295 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
35296 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
35297 #endif
35299 else
35301 if (dollar_inside)
35303 if (data->function_descriptor)
35304 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
35305 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
35307 if (data->function_descriptor)
35309 fputs ("\t.lglobl .", data->file);
35310 RS6000_OUTPUT_BASENAME (data->file, buffer);
35311 putc ('\n', data->file);
35313 fputs ("\t.lglobl ", data->file);
35314 RS6000_OUTPUT_BASENAME (data->file, buffer);
35315 putc ('\n', data->file);
35317 if (data->function_descriptor)
35318 fputs (".", data->file);
35319 RS6000_OUTPUT_BASENAME (data->file, buffer);
35320 fputs (":\n", data->file);
35321 return false;
35324 /* This macro produces the initial definition of a function name.
35325 On the RS/6000, we need to place an extra '.' in the function name and
35326 output the function descriptor.
35327 Dollar signs are converted to underscores.
35329 The csect for the function will have already been created when
35330 text_section was selected. We do have to go back to that csect, however.
35332 The third and fourth parameters to the .function pseudo-op (16 and 044)
35333 are placeholders which no longer have any use.
35335 Because AIX assembler's .set command has unexpected semantics, we output
35336 all aliases as alternative labels in front of the definition. */
35338 void
35339 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
35341 char *buffer = (char *) alloca (strlen (name) + 1);
35342 char *p;
35343 int dollar_inside = 0;
35344 struct declare_alias_data data = {file, false};
35346 strcpy (buffer, name);
35347 p = strchr (buffer, '$');
35348 while (p) {
35349 *p = '_';
35350 dollar_inside++;
35351 p = strchr (p + 1, '$');
35353 if (TREE_PUBLIC (decl))
35355 if (!RS6000_WEAK || !DECL_WEAK (decl))
35357 if (dollar_inside) {
35358 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35359 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35361 fputs ("\t.globl .", file);
35362 RS6000_OUTPUT_BASENAME (file, buffer);
35363 putc ('\n', file);
35366 else
35368 if (dollar_inside) {
35369 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
35370 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
35372 fputs ("\t.lglobl .", file);
35373 RS6000_OUTPUT_BASENAME (file, buffer);
35374 putc ('\n', file);
35376 fputs ("\t.csect ", file);
35377 RS6000_OUTPUT_BASENAME (file, buffer);
35378 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
35379 RS6000_OUTPUT_BASENAME (file, buffer);
35380 fputs (":\n", file);
35381 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35382 &data, true);
35383 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
35384 RS6000_OUTPUT_BASENAME (file, buffer);
35385 fputs (", TOC[tc0], 0\n", file);
35386 in_section = NULL;
35387 switch_to_section (function_section (decl));
35388 putc ('.', file);
35389 RS6000_OUTPUT_BASENAME (file, buffer);
35390 fputs (":\n", file);
35391 data.function_descriptor = true;
35392 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35393 &data, true);
35394 if (!DECL_IGNORED_P (decl))
35396 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
35397 xcoffout_declare_function (file, decl, buffer);
35398 else if (write_symbols == DWARF2_DEBUG)
35400 name = (*targetm.strip_name_encoding) (name);
35401 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
35404 return;
35407 /* This macro produces the initial definition of a object (variable) name.
35408 Because AIX assembler's .set command has unexpected semantics, we output
35409 all aliases as alternative labels in front of the definition. */
35411 void
35412 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
35414 struct declare_alias_data data = {file, false};
35415 RS6000_OUTPUT_BASENAME (file, name);
35416 fputs (":\n", file);
35417 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
35418 &data, true);
35421 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
35423 void
35424 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
35426 fputs (integer_asm_op (size, FALSE), file);
35427 assemble_name (file, label);
35428 fputs ("-$", file);
35431 /* Output a symbol offset relative to the dbase for the current object.
35432 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
35433 signed offsets.
35435 __gcc_unwind_dbase is embedded in all executables/libraries through
35436 libgcc/config/rs6000/crtdbase.S. */
35438 void
35439 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
35441 fputs (integer_asm_op (size, FALSE), file);
35442 assemble_name (file, label);
35443 fputs("-__gcc_unwind_dbase", file);
35446 #ifdef HAVE_AS_TLS
35447 static void
35448 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
35450 rtx symbol;
35451 int flags;
35452 const char *symname;
35454 default_encode_section_info (decl, rtl, first);
35456 /* Careful not to prod global register variables. */
35457 if (!MEM_P (rtl))
35458 return;
35459 symbol = XEXP (rtl, 0);
35460 if (GET_CODE (symbol) != SYMBOL_REF)
35461 return;
35463 flags = SYMBOL_REF_FLAGS (symbol);
35465 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
35466 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
35468 SYMBOL_REF_FLAGS (symbol) = flags;
35470 /* Append mapping class to extern decls. */
35471 symname = XSTR (symbol, 0);
35472 if (decl /* sync condition with assemble_external () */
35473 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
35474 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
35475 || TREE_CODE (decl) == FUNCTION_DECL)
35476 && symname[strlen (symname) - 1] != ']')
35478 char *newname = (char *) alloca (strlen (symname) + 5);
35479 strcpy (newname, symname);
35480 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
35481 ? "[DS]" : "[UA]"));
35482 XSTR (symbol, 0) = ggc_strdup (newname);
35485 #endif /* HAVE_AS_TLS */
35486 #endif /* TARGET_XCOFF */
35488 /* Return true if INSN should not be copied. */
35490 static bool
35491 rs6000_cannot_copy_insn_p (rtx_insn *insn)
35493 return recog_memoized (insn) >= 0
35494 && get_attr_cannot_copy (insn);
35497 /* Compute a (partial) cost for rtx X. Return true if the complete
35498 cost has been computed, and false if subexpressions should be
35499 scanned. In either case, *TOTAL contains the cost result. */
35501 static bool
35502 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
35503 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
35505 int code = GET_CODE (x);
35507 switch (code)
35509 /* On the RS/6000, if it is valid in the insn, it is free. */
35510 case CONST_INT:
35511 if (((outer_code == SET
35512 || outer_code == PLUS
35513 || outer_code == MINUS)
35514 && (satisfies_constraint_I (x)
35515 || satisfies_constraint_L (x)))
35516 || (outer_code == AND
35517 && (satisfies_constraint_K (x)
35518 || (mode == SImode
35519 ? satisfies_constraint_L (x)
35520 : satisfies_constraint_J (x))))
35521 || ((outer_code == IOR || outer_code == XOR)
35522 && (satisfies_constraint_K (x)
35523 || (mode == SImode
35524 ? satisfies_constraint_L (x)
35525 : satisfies_constraint_J (x))))
35526 || outer_code == ASHIFT
35527 || outer_code == ASHIFTRT
35528 || outer_code == LSHIFTRT
35529 || outer_code == ROTATE
35530 || outer_code == ROTATERT
35531 || outer_code == ZERO_EXTRACT
35532 || (outer_code == MULT
35533 && satisfies_constraint_I (x))
35534 || ((outer_code == DIV || outer_code == UDIV
35535 || outer_code == MOD || outer_code == UMOD)
35536 && exact_log2 (INTVAL (x)) >= 0)
35537 || (outer_code == COMPARE
35538 && (satisfies_constraint_I (x)
35539 || satisfies_constraint_K (x)))
35540 || ((outer_code == EQ || outer_code == NE)
35541 && (satisfies_constraint_I (x)
35542 || satisfies_constraint_K (x)
35543 || (mode == SImode
35544 ? satisfies_constraint_L (x)
35545 : satisfies_constraint_J (x))))
35546 || (outer_code == GTU
35547 && satisfies_constraint_I (x))
35548 || (outer_code == LTU
35549 && satisfies_constraint_P (x)))
35551 *total = 0;
35552 return true;
35554 else if ((outer_code == PLUS
35555 && reg_or_add_cint_operand (x, VOIDmode))
35556 || (outer_code == MINUS
35557 && reg_or_sub_cint_operand (x, VOIDmode))
35558 || ((outer_code == SET
35559 || outer_code == IOR
35560 || outer_code == XOR)
35561 && (INTVAL (x)
35562 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
35564 *total = COSTS_N_INSNS (1);
35565 return true;
35567 /* FALLTHRU */
35569 case CONST_DOUBLE:
35570 case CONST_WIDE_INT:
35571 case CONST:
35572 case HIGH:
35573 case SYMBOL_REF:
35574 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35575 return true;
35577 case MEM:
35578 /* When optimizing for size, MEM should be slightly more expensive
35579 than generating address, e.g., (plus (reg) (const)).
35580 L1 cache latency is about two instructions. */
35581 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
35582 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
35583 *total += COSTS_N_INSNS (100);
35584 return true;
35586 case LABEL_REF:
35587 *total = 0;
35588 return true;
35590 case PLUS:
35591 case MINUS:
35592 if (FLOAT_MODE_P (mode))
35593 *total = rs6000_cost->fp;
35594 else
35595 *total = COSTS_N_INSNS (1);
35596 return false;
35598 case MULT:
35599 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35600 && satisfies_constraint_I (XEXP (x, 1)))
35602 if (INTVAL (XEXP (x, 1)) >= -256
35603 && INTVAL (XEXP (x, 1)) <= 255)
35604 *total = rs6000_cost->mulsi_const9;
35605 else
35606 *total = rs6000_cost->mulsi_const;
35608 else if (mode == SFmode)
35609 *total = rs6000_cost->fp;
35610 else if (FLOAT_MODE_P (mode))
35611 *total = rs6000_cost->dmul;
35612 else if (mode == DImode)
35613 *total = rs6000_cost->muldi;
35614 else
35615 *total = rs6000_cost->mulsi;
35616 return false;
35618 case FMA:
35619 if (mode == SFmode)
35620 *total = rs6000_cost->fp;
35621 else
35622 *total = rs6000_cost->dmul;
35623 break;
35625 case DIV:
35626 case MOD:
35627 if (FLOAT_MODE_P (mode))
35629 *total = mode == DFmode ? rs6000_cost->ddiv
35630 : rs6000_cost->sdiv;
35631 return false;
35633 /* FALLTHRU */
35635 case UDIV:
35636 case UMOD:
35637 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35638 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
35640 if (code == DIV || code == MOD)
35641 /* Shift, addze */
35642 *total = COSTS_N_INSNS (2);
35643 else
35644 /* Shift */
35645 *total = COSTS_N_INSNS (1);
35647 else
35649 if (GET_MODE (XEXP (x, 1)) == DImode)
35650 *total = rs6000_cost->divdi;
35651 else
35652 *total = rs6000_cost->divsi;
35654 /* Add in shift and subtract for MOD unless we have a mod instruction. */
35655 if (!TARGET_MODULO && (code == MOD || code == UMOD))
35656 *total += COSTS_N_INSNS (2);
35657 return false;
35659 case CTZ:
35660 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
35661 return false;
35663 case FFS:
35664 *total = COSTS_N_INSNS (4);
35665 return false;
35667 case POPCOUNT:
35668 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
35669 return false;
35671 case PARITY:
35672 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
35673 return false;
35675 case NOT:
35676 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
35677 *total = 0;
35678 else
35679 *total = COSTS_N_INSNS (1);
35680 return false;
35682 case AND:
35683 if (CONST_INT_P (XEXP (x, 1)))
35685 rtx left = XEXP (x, 0);
35686 rtx_code left_code = GET_CODE (left);
35688 /* rotate-and-mask: 1 insn. */
35689 if ((left_code == ROTATE
35690 || left_code == ASHIFT
35691 || left_code == LSHIFTRT)
35692 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
35694 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
35695 if (!CONST_INT_P (XEXP (left, 1)))
35696 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
35697 *total += COSTS_N_INSNS (1);
35698 return true;
35701 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
35702 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
35703 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
35704 || (val & 0xffff) == val
35705 || (val & 0xffff0000) == val
35706 || ((val & 0xffff) == 0 && mode == SImode))
35708 *total = rtx_cost (left, mode, AND, 0, speed);
35709 *total += COSTS_N_INSNS (1);
35710 return true;
35713 /* 2 insns. */
35714 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
35716 *total = rtx_cost (left, mode, AND, 0, speed);
35717 *total += COSTS_N_INSNS (2);
35718 return true;
35722 *total = COSTS_N_INSNS (1);
35723 return false;
35725 case IOR:
35726 /* FIXME */
35727 *total = COSTS_N_INSNS (1);
35728 return true;
35730 case CLZ:
35731 case XOR:
35732 case ZERO_EXTRACT:
35733 *total = COSTS_N_INSNS (1);
35734 return false;
35736 case ASHIFT:
35737 /* The EXTSWSLI instruction is a combined instruction. Don't count both
35738 the sign extend and shift separately within the insn. */
35739 if (TARGET_EXTSWSLI && mode == DImode
35740 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
35741 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
35743 *total = 0;
35744 return false;
35746 /* fall through */
35748 case ASHIFTRT:
35749 case LSHIFTRT:
35750 case ROTATE:
35751 case ROTATERT:
35752 /* Handle mul_highpart. */
35753 if (outer_code == TRUNCATE
35754 && GET_CODE (XEXP (x, 0)) == MULT)
35756 if (mode == DImode)
35757 *total = rs6000_cost->muldi;
35758 else
35759 *total = rs6000_cost->mulsi;
35760 return true;
35762 else if (outer_code == AND)
35763 *total = 0;
35764 else
35765 *total = COSTS_N_INSNS (1);
35766 return false;
35768 case SIGN_EXTEND:
35769 case ZERO_EXTEND:
35770 if (GET_CODE (XEXP (x, 0)) == MEM)
35771 *total = 0;
35772 else
35773 *total = COSTS_N_INSNS (1);
35774 return false;
35776 case COMPARE:
35777 case NEG:
35778 case ABS:
35779 if (!FLOAT_MODE_P (mode))
35781 *total = COSTS_N_INSNS (1);
35782 return false;
35784 /* FALLTHRU */
35786 case FLOAT:
35787 case UNSIGNED_FLOAT:
35788 case FIX:
35789 case UNSIGNED_FIX:
35790 case FLOAT_TRUNCATE:
35791 *total = rs6000_cost->fp;
35792 return false;
35794 case FLOAT_EXTEND:
35795 if (mode == DFmode)
35796 *total = rs6000_cost->sfdf_convert;
35797 else
35798 *total = rs6000_cost->fp;
35799 return false;
35801 case UNSPEC:
35802 switch (XINT (x, 1))
35804 case UNSPEC_FRSP:
35805 *total = rs6000_cost->fp;
35806 return true;
35808 default:
35809 break;
35811 break;
35813 case CALL:
35814 case IF_THEN_ELSE:
35815 if (!speed)
35817 *total = COSTS_N_INSNS (1);
35818 return true;
35820 else if (FLOAT_MODE_P (mode)
35821 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
35823 *total = rs6000_cost->fp;
35824 return false;
35826 break;
35828 case NE:
35829 case EQ:
35830 case GTU:
35831 case LTU:
35832 /* Carry bit requires mode == Pmode.
35833 NEG or PLUS already counted so only add one. */
35834 if (mode == Pmode
35835 && (outer_code == NEG || outer_code == PLUS))
35837 *total = COSTS_N_INSNS (1);
35838 return true;
35840 if (outer_code == SET)
35842 if (XEXP (x, 1) == const0_rtx)
35844 if (TARGET_ISEL && !TARGET_MFCRF)
35845 *total = COSTS_N_INSNS (8);
35846 else
35847 *total = COSTS_N_INSNS (2);
35848 return true;
35850 else
35852 *total = COSTS_N_INSNS (3);
35853 return false;
35856 /* FALLTHRU */
35858 case GT:
35859 case LT:
35860 case UNORDERED:
35861 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
35863 if (TARGET_ISEL && !TARGET_MFCRF)
35864 *total = COSTS_N_INSNS (8);
35865 else
35866 *total = COSTS_N_INSNS (2);
35867 return true;
35869 /* CC COMPARE. */
35870 if (outer_code == COMPARE)
35872 *total = 0;
35873 return true;
35875 break;
35877 default:
35878 break;
35881 return false;
35884 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
35886 static bool
35887 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
35888 int opno, int *total, bool speed)
35890 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
35892 fprintf (stderr,
35893 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
35894 "opno = %d, total = %d, speed = %s, x:\n",
35895 ret ? "complete" : "scan inner",
35896 GET_MODE_NAME (mode),
35897 GET_RTX_NAME (outer_code),
35898 opno,
35899 *total,
35900 speed ? "true" : "false");
35902 debug_rtx (x);
35904 return ret;
35907 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35909 static int
35910 rs6000_debug_address_cost (rtx x, machine_mode mode,
35911 addr_space_t as, bool speed)
35913 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35915 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35916 ret, speed ? "true" : "false");
35917 debug_rtx (x);
35919 return ret;
35923 /* A C expression returning the cost of moving data from a register of class
35924 CLASS1 to one of CLASS2. */
35926 static int
35927 rs6000_register_move_cost (machine_mode mode,
35928 reg_class_t from, reg_class_t to)
35930 int ret;
35932 if (TARGET_DEBUG_COST)
35933 dbg_cost_ctrl++;
35935 /* Moves from/to GENERAL_REGS. */
35936 if (reg_classes_intersect_p (to, GENERAL_REGS)
35937 || reg_classes_intersect_p (from, GENERAL_REGS))
35939 reg_class_t rclass = from;
35941 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35942 rclass = to;
35944 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35945 ret = (rs6000_memory_move_cost (mode, rclass, false)
35946 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35948 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35949 shift. */
35950 else if (rclass == CR_REGS)
35951 ret = 4;
35953 /* For those processors that have slow LR/CTR moves, make them more
35954 expensive than memory in order to bias spills to memory .*/
35955 else if ((rs6000_cpu == PROCESSOR_POWER6
35956 || rs6000_cpu == PROCESSOR_POWER7
35957 || rs6000_cpu == PROCESSOR_POWER8
35958 || rs6000_cpu == PROCESSOR_POWER9)
35959 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35960 ret = 6 * hard_regno_nregs[0][mode];
35962 else
35963 /* A move will cost one instruction per GPR moved. */
35964 ret = 2 * hard_regno_nregs[0][mode];
35967 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35968 else if (VECTOR_MEM_VSX_P (mode)
35969 && reg_classes_intersect_p (to, VSX_REGS)
35970 && reg_classes_intersect_p (from, VSX_REGS))
35971 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
35973 /* Moving between two similar registers is just one instruction. */
35974 else if (reg_classes_intersect_p (to, from))
35975 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35977 /* Everything else has to go through GENERAL_REGS. */
35978 else
35979 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35980 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35982 if (TARGET_DEBUG_COST)
35984 if (dbg_cost_ctrl == 1)
35985 fprintf (stderr,
35986 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35987 ret, GET_MODE_NAME (mode), reg_class_names[from],
35988 reg_class_names[to]);
35989 dbg_cost_ctrl--;
35992 return ret;
35995 /* A C expressions returning the cost of moving data of MODE from a register to
35996 or from memory. */
35998 static int
35999 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
36000 bool in ATTRIBUTE_UNUSED)
36002 int ret;
36004 if (TARGET_DEBUG_COST)
36005 dbg_cost_ctrl++;
36007 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
36008 ret = 4 * hard_regno_nregs[0][mode];
36009 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
36010 || reg_classes_intersect_p (rclass, VSX_REGS)))
36011 ret = 4 * hard_regno_nregs[32][mode];
36012 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
36013 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
36014 else
36015 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
36017 if (TARGET_DEBUG_COST)
36019 if (dbg_cost_ctrl == 1)
36020 fprintf (stderr,
36021 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
36022 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
36023 dbg_cost_ctrl--;
36026 return ret;
36029 /* Returns a code for a target-specific builtin that implements
36030 reciprocal of the function, or NULL_TREE if not available. */
36032 static tree
36033 rs6000_builtin_reciprocal (tree fndecl)
36035 switch (DECL_FUNCTION_CODE (fndecl))
36037 case VSX_BUILTIN_XVSQRTDP:
36038 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
36039 return NULL_TREE;
36041 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
36043 case VSX_BUILTIN_XVSQRTSP:
36044 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
36045 return NULL_TREE;
36047 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
36049 default:
36050 return NULL_TREE;
36054 /* Load up a constant. If the mode is a vector mode, splat the value across
36055 all of the vector elements. */
36057 static rtx
36058 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
36060 rtx reg;
36062 if (mode == SFmode || mode == DFmode)
36064 rtx d = const_double_from_real_value (dconst, mode);
36065 reg = force_reg (mode, d);
36067 else if (mode == V4SFmode)
36069 rtx d = const_double_from_real_value (dconst, SFmode);
36070 rtvec v = gen_rtvec (4, d, d, d, d);
36071 reg = gen_reg_rtx (mode);
36072 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36074 else if (mode == V2DFmode)
36076 rtx d = const_double_from_real_value (dconst, DFmode);
36077 rtvec v = gen_rtvec (2, d, d);
36078 reg = gen_reg_rtx (mode);
36079 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
36081 else
36082 gcc_unreachable ();
36084 return reg;
36087 /* Generate an FMA instruction. */
36089 static void
36090 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
36092 machine_mode mode = GET_MODE (target);
36093 rtx dst;
36095 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
36096 gcc_assert (dst != NULL);
36098 if (dst != target)
36099 emit_move_insn (target, dst);
36102 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
36104 static void
36105 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
36107 machine_mode mode = GET_MODE (dst);
36108 rtx r;
36110 /* This is a tad more complicated, since the fnma_optab is for
36111 a different expression: fma(-m1, m2, a), which is the same
36112 thing except in the case of signed zeros.
36114 Fortunately we know that if FMA is supported that FNMSUB is
36115 also supported in the ISA. Just expand it directly. */
36117 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
36119 r = gen_rtx_NEG (mode, a);
36120 r = gen_rtx_FMA (mode, m1, m2, r);
36121 r = gen_rtx_NEG (mode, r);
36122 emit_insn (gen_rtx_SET (dst, r));
36125 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
36126 add a reg_note saying that this was a division. Support both scalar and
36127 vector divide. Assumes no trapping math and finite arguments. */
36129 void
36130 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
36132 machine_mode mode = GET_MODE (dst);
36133 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
36134 int i;
36136 /* Low precision estimates guarantee 5 bits of accuracy. High
36137 precision estimates guarantee 14 bits of accuracy. SFmode
36138 requires 23 bits of accuracy. DFmode requires 52 bits of
36139 accuracy. Each pass at least doubles the accuracy, leading
36140 to the following. */
36141 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36142 if (mode == DFmode || mode == V2DFmode)
36143 passes++;
36145 enum insn_code code = optab_handler (smul_optab, mode);
36146 insn_gen_fn gen_mul = GEN_FCN (code);
36148 gcc_assert (code != CODE_FOR_nothing);
36150 one = rs6000_load_constant_and_splat (mode, dconst1);
36152 /* x0 = 1./d estimate */
36153 x0 = gen_reg_rtx (mode);
36154 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
36155 UNSPEC_FRES)));
36157 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
36158 if (passes > 1) {
36160 /* e0 = 1. - d * x0 */
36161 e0 = gen_reg_rtx (mode);
36162 rs6000_emit_nmsub (e0, d, x0, one);
36164 /* x1 = x0 + e0 * x0 */
36165 x1 = gen_reg_rtx (mode);
36166 rs6000_emit_madd (x1, e0, x0, x0);
36168 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
36169 ++i, xprev = xnext, eprev = enext) {
36171 /* enext = eprev * eprev */
36172 enext = gen_reg_rtx (mode);
36173 emit_insn (gen_mul (enext, eprev, eprev));
36175 /* xnext = xprev + enext * xprev */
36176 xnext = gen_reg_rtx (mode);
36177 rs6000_emit_madd (xnext, enext, xprev, xprev);
36180 } else
36181 xprev = x0;
36183 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
36185 /* u = n * xprev */
36186 u = gen_reg_rtx (mode);
36187 emit_insn (gen_mul (u, n, xprev));
36189 /* v = n - (d * u) */
36190 v = gen_reg_rtx (mode);
36191 rs6000_emit_nmsub (v, d, u, n);
36193 /* dst = (v * xprev) + u */
36194 rs6000_emit_madd (dst, v, xprev, u);
36196 if (note_p)
36197 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
36200 /* Goldschmidt's Algorithm for single/double-precision floating point
36201 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
36203 void
36204 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
36206 machine_mode mode = GET_MODE (src);
36207 rtx e = gen_reg_rtx (mode);
36208 rtx g = gen_reg_rtx (mode);
36209 rtx h = gen_reg_rtx (mode);
36211 /* Low precision estimates guarantee 5 bits of accuracy. High
36212 precision estimates guarantee 14 bits of accuracy. SFmode
36213 requires 23 bits of accuracy. DFmode requires 52 bits of
36214 accuracy. Each pass at least doubles the accuracy, leading
36215 to the following. */
36216 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
36217 if (mode == DFmode || mode == V2DFmode)
36218 passes++;
36220 int i;
36221 rtx mhalf;
36222 enum insn_code code = optab_handler (smul_optab, mode);
36223 insn_gen_fn gen_mul = GEN_FCN (code);
36225 gcc_assert (code != CODE_FOR_nothing);
36227 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
36229 /* e = rsqrt estimate */
36230 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
36231 UNSPEC_RSQRT)));
36233 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
36234 if (!recip)
36236 rtx zero = force_reg (mode, CONST0_RTX (mode));
36238 if (mode == SFmode)
36240 rtx target = emit_conditional_move (e, GT, src, zero, mode,
36241 e, zero, mode, 0);
36242 if (target != e)
36243 emit_move_insn (e, target);
36245 else
36247 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
36248 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
36252 /* g = sqrt estimate. */
36253 emit_insn (gen_mul (g, e, src));
36254 /* h = 1/(2*sqrt) estimate. */
36255 emit_insn (gen_mul (h, e, mhalf));
36257 if (recip)
36259 if (passes == 1)
36261 rtx t = gen_reg_rtx (mode);
36262 rs6000_emit_nmsub (t, g, h, mhalf);
36263 /* Apply correction directly to 1/rsqrt estimate. */
36264 rs6000_emit_madd (dst, e, t, e);
36266 else
36268 for (i = 0; i < passes; i++)
36270 rtx t1 = gen_reg_rtx (mode);
36271 rtx g1 = gen_reg_rtx (mode);
36272 rtx h1 = gen_reg_rtx (mode);
36274 rs6000_emit_nmsub (t1, g, h, mhalf);
36275 rs6000_emit_madd (g1, g, t1, g);
36276 rs6000_emit_madd (h1, h, t1, h);
36278 g = g1;
36279 h = h1;
36281 /* Multiply by 2 for 1/rsqrt. */
36282 emit_insn (gen_add3_insn (dst, h, h));
36285 else
36287 rtx t = gen_reg_rtx (mode);
36288 rs6000_emit_nmsub (t, g, h, mhalf);
36289 rs6000_emit_madd (dst, g, t, g);
36292 return;
36295 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
36296 (Power7) targets. DST is the target, and SRC is the argument operand. */
36298 void
36299 rs6000_emit_popcount (rtx dst, rtx src)
36301 machine_mode mode = GET_MODE (dst);
36302 rtx tmp1, tmp2;
36304 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
36305 if (TARGET_POPCNTD)
36307 if (mode == SImode)
36308 emit_insn (gen_popcntdsi2 (dst, src));
36309 else
36310 emit_insn (gen_popcntddi2 (dst, src));
36311 return;
36314 tmp1 = gen_reg_rtx (mode);
36316 if (mode == SImode)
36318 emit_insn (gen_popcntbsi2 (tmp1, src));
36319 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
36320 NULL_RTX, 0);
36321 tmp2 = force_reg (SImode, tmp2);
36322 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
36324 else
36326 emit_insn (gen_popcntbdi2 (tmp1, src));
36327 tmp2 = expand_mult (DImode, tmp1,
36328 GEN_INT ((HOST_WIDE_INT)
36329 0x01010101 << 32 | 0x01010101),
36330 NULL_RTX, 0);
36331 tmp2 = force_reg (DImode, tmp2);
36332 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
36337 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
36338 target, and SRC is the argument operand. */
36340 void
36341 rs6000_emit_parity (rtx dst, rtx src)
36343 machine_mode mode = GET_MODE (dst);
36344 rtx tmp;
36346 tmp = gen_reg_rtx (mode);
36348 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
36349 if (TARGET_CMPB)
36351 if (mode == SImode)
36353 emit_insn (gen_popcntbsi2 (tmp, src));
36354 emit_insn (gen_paritysi2_cmpb (dst, tmp));
36356 else
36358 emit_insn (gen_popcntbdi2 (tmp, src));
36359 emit_insn (gen_paritydi2_cmpb (dst, tmp));
36361 return;
36364 if (mode == SImode)
36366 /* Is mult+shift >= shift+xor+shift+xor? */
36367 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
36369 rtx tmp1, tmp2, tmp3, tmp4;
36371 tmp1 = gen_reg_rtx (SImode);
36372 emit_insn (gen_popcntbsi2 (tmp1, src));
36374 tmp2 = gen_reg_rtx (SImode);
36375 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
36376 tmp3 = gen_reg_rtx (SImode);
36377 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
36379 tmp4 = gen_reg_rtx (SImode);
36380 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
36381 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
36383 else
36384 rs6000_emit_popcount (tmp, src);
36385 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
36387 else
36389 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
36390 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
36392 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
36394 tmp1 = gen_reg_rtx (DImode);
36395 emit_insn (gen_popcntbdi2 (tmp1, src));
36397 tmp2 = gen_reg_rtx (DImode);
36398 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
36399 tmp3 = gen_reg_rtx (DImode);
36400 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
36402 tmp4 = gen_reg_rtx (DImode);
36403 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
36404 tmp5 = gen_reg_rtx (DImode);
36405 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
36407 tmp6 = gen_reg_rtx (DImode);
36408 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
36409 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
36411 else
36412 rs6000_emit_popcount (tmp, src);
36413 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
36417 /* Expand an Altivec constant permutation for little endian mode.
36418 There are two issues: First, the two input operands must be
36419 swapped so that together they form a double-wide array in LE
36420 order. Second, the vperm instruction has surprising behavior
36421 in LE mode: it interprets the elements of the source vectors
36422 in BE mode ("left to right") and interprets the elements of
36423 the destination vector in LE mode ("right to left"). To
36424 correct for this, we must subtract each element of the permute
36425 control vector from 31.
36427 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
36428 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
36429 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
36430 serve as the permute control vector. Then, in BE mode,
36432 vperm 9,10,11,12
36434 places the desired result in vr9. However, in LE mode the
36435 vector contents will be
36437 vr10 = 00000003 00000002 00000001 00000000
36438 vr11 = 00000007 00000006 00000005 00000004
36440 The result of the vperm using the same permute control vector is
36442 vr9 = 05000000 07000000 01000000 03000000
36444 That is, the leftmost 4 bytes of vr10 are interpreted as the
36445 source for the rightmost 4 bytes of vr9, and so on.
36447 If we change the permute control vector to
36449 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
36451 and issue
36453 vperm 9,11,10,12
36455 we get the desired
36457 vr9 = 00000006 00000004 00000002 00000000. */
36459 void
36460 altivec_expand_vec_perm_const_le (rtx operands[4])
36462 unsigned int i;
36463 rtx perm[16];
36464 rtx constv, unspec;
36465 rtx target = operands[0];
36466 rtx op0 = operands[1];
36467 rtx op1 = operands[2];
36468 rtx sel = operands[3];
36470 /* Unpack and adjust the constant selector. */
36471 for (i = 0; i < 16; ++i)
36473 rtx e = XVECEXP (sel, 0, i);
36474 unsigned int elt = 31 - (INTVAL (e) & 31);
36475 perm[i] = GEN_INT (elt);
36478 /* Expand to a permute, swapping the inputs and using the
36479 adjusted selector. */
36480 if (!REG_P (op0))
36481 op0 = force_reg (V16QImode, op0);
36482 if (!REG_P (op1))
36483 op1 = force_reg (V16QImode, op1);
36485 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
36486 constv = force_reg (V16QImode, constv);
36487 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
36488 UNSPEC_VPERM);
36489 if (!REG_P (target))
36491 rtx tmp = gen_reg_rtx (V16QImode);
36492 emit_move_insn (tmp, unspec);
36493 unspec = tmp;
36496 emit_move_insn (target, unspec);
36499 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
36500 permute control vector. But here it's not a constant, so we must
36501 generate a vector NAND or NOR to do the adjustment. */
36503 void
36504 altivec_expand_vec_perm_le (rtx operands[4])
36506 rtx notx, iorx, unspec;
36507 rtx target = operands[0];
36508 rtx op0 = operands[1];
36509 rtx op1 = operands[2];
36510 rtx sel = operands[3];
36511 rtx tmp = target;
36512 rtx norreg = gen_reg_rtx (V16QImode);
36513 machine_mode mode = GET_MODE (target);
36515 /* Get everything in regs so the pattern matches. */
36516 if (!REG_P (op0))
36517 op0 = force_reg (mode, op0);
36518 if (!REG_P (op1))
36519 op1 = force_reg (mode, op1);
36520 if (!REG_P (sel))
36521 sel = force_reg (V16QImode, sel);
36522 if (!REG_P (target))
36523 tmp = gen_reg_rtx (mode);
36525 if (TARGET_P9_VECTOR)
36527 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
36528 UNSPEC_VPERMR);
36530 else
36532 /* Invert the selector with a VNAND if available, else a VNOR.
36533 The VNAND is preferred for future fusion opportunities. */
36534 notx = gen_rtx_NOT (V16QImode, sel);
36535 iorx = (TARGET_P8_VECTOR
36536 ? gen_rtx_IOR (V16QImode, notx, notx)
36537 : gen_rtx_AND (V16QImode, notx, notx));
36538 emit_insn (gen_rtx_SET (norreg, iorx));
36540 /* Permute with operands reversed and adjusted selector. */
36541 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
36542 UNSPEC_VPERM);
36545 /* Copy into target, possibly by way of a register. */
36546 if (!REG_P (target))
36548 emit_move_insn (tmp, unspec);
36549 unspec = tmp;
36552 emit_move_insn (target, unspec);
36555 /* Expand an Altivec constant permutation. Return true if we match
36556 an efficient implementation; false to fall back to VPERM. */
36558 bool
36559 altivec_expand_vec_perm_const (rtx operands[4])
36561 struct altivec_perm_insn {
36562 HOST_WIDE_INT mask;
36563 enum insn_code impl;
36564 unsigned char perm[16];
36566 static const struct altivec_perm_insn patterns[] = {
36567 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
36568 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
36569 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
36570 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
36571 { OPTION_MASK_ALTIVEC,
36572 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
36573 : CODE_FOR_altivec_vmrglb_direct),
36574 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
36575 { OPTION_MASK_ALTIVEC,
36576 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
36577 : CODE_FOR_altivec_vmrglh_direct),
36578 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
36579 { OPTION_MASK_ALTIVEC,
36580 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
36581 : CODE_FOR_altivec_vmrglw_direct),
36582 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
36583 { OPTION_MASK_ALTIVEC,
36584 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
36585 : CODE_FOR_altivec_vmrghb_direct),
36586 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
36587 { OPTION_MASK_ALTIVEC,
36588 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
36589 : CODE_FOR_altivec_vmrghh_direct),
36590 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
36591 { OPTION_MASK_ALTIVEC,
36592 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
36593 : CODE_FOR_altivec_vmrghw_direct),
36594 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
36595 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
36596 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
36597 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
36598 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
36601 unsigned int i, j, elt, which;
36602 unsigned char perm[16];
36603 rtx target, op0, op1, sel, x;
36604 bool one_vec;
36606 target = operands[0];
36607 op0 = operands[1];
36608 op1 = operands[2];
36609 sel = operands[3];
36611 /* Unpack the constant selector. */
36612 for (i = which = 0; i < 16; ++i)
36614 rtx e = XVECEXP (sel, 0, i);
36615 elt = INTVAL (e) & 31;
36616 which |= (elt < 16 ? 1 : 2);
36617 perm[i] = elt;
36620 /* Simplify the constant selector based on operands. */
36621 switch (which)
36623 default:
36624 gcc_unreachable ();
36626 case 3:
36627 one_vec = false;
36628 if (!rtx_equal_p (op0, op1))
36629 break;
36630 /* FALLTHRU */
36632 case 2:
36633 for (i = 0; i < 16; ++i)
36634 perm[i] &= 15;
36635 op0 = op1;
36636 one_vec = true;
36637 break;
36639 case 1:
36640 op1 = op0;
36641 one_vec = true;
36642 break;
36645 /* Look for splat patterns. */
36646 if (one_vec)
36648 elt = perm[0];
36650 for (i = 0; i < 16; ++i)
36651 if (perm[i] != elt)
36652 break;
36653 if (i == 16)
36655 if (!BYTES_BIG_ENDIAN)
36656 elt = 15 - elt;
36657 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
36658 return true;
36661 if (elt % 2 == 0)
36663 for (i = 0; i < 16; i += 2)
36664 if (perm[i] != elt || perm[i + 1] != elt + 1)
36665 break;
36666 if (i == 16)
36668 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
36669 x = gen_reg_rtx (V8HImode);
36670 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
36671 GEN_INT (field)));
36672 emit_move_insn (target, gen_lowpart (V16QImode, x));
36673 return true;
36677 if (elt % 4 == 0)
36679 for (i = 0; i < 16; i += 4)
36680 if (perm[i] != elt
36681 || perm[i + 1] != elt + 1
36682 || perm[i + 2] != elt + 2
36683 || perm[i + 3] != elt + 3)
36684 break;
36685 if (i == 16)
36687 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
36688 x = gen_reg_rtx (V4SImode);
36689 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
36690 GEN_INT (field)));
36691 emit_move_insn (target, gen_lowpart (V16QImode, x));
36692 return true;
36697 /* Look for merge and pack patterns. */
36698 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
36700 bool swapped;
36702 if ((patterns[j].mask & rs6000_isa_flags) == 0)
36703 continue;
36705 elt = patterns[j].perm[0];
36706 if (perm[0] == elt)
36707 swapped = false;
36708 else if (perm[0] == elt + 16)
36709 swapped = true;
36710 else
36711 continue;
36712 for (i = 1; i < 16; ++i)
36714 elt = patterns[j].perm[i];
36715 if (swapped)
36716 elt = (elt >= 16 ? elt - 16 : elt + 16);
36717 else if (one_vec && elt >= 16)
36718 elt -= 16;
36719 if (perm[i] != elt)
36720 break;
36722 if (i == 16)
36724 enum insn_code icode = patterns[j].impl;
36725 machine_mode omode = insn_data[icode].operand[0].mode;
36726 machine_mode imode = insn_data[icode].operand[1].mode;
36728 /* For little-endian, don't use vpkuwum and vpkuhum if the
36729 underlying vector type is not V4SI and V8HI, respectively.
36730 For example, using vpkuwum with a V8HI picks up the even
36731 halfwords (BE numbering) when the even halfwords (LE
36732 numbering) are what we need. */
36733 if (!BYTES_BIG_ENDIAN
36734 && icode == CODE_FOR_altivec_vpkuwum_direct
36735 && ((GET_CODE (op0) == REG
36736 && GET_MODE (op0) != V4SImode)
36737 || (GET_CODE (op0) == SUBREG
36738 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
36739 continue;
36740 if (!BYTES_BIG_ENDIAN
36741 && icode == CODE_FOR_altivec_vpkuhum_direct
36742 && ((GET_CODE (op0) == REG
36743 && GET_MODE (op0) != V8HImode)
36744 || (GET_CODE (op0) == SUBREG
36745 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
36746 continue;
36748 /* For little-endian, the two input operands must be swapped
36749 (or swapped back) to ensure proper right-to-left numbering
36750 from 0 to 2N-1. */
36751 if (swapped ^ !BYTES_BIG_ENDIAN)
36752 std::swap (op0, op1);
36753 if (imode != V16QImode)
36755 op0 = gen_lowpart (imode, op0);
36756 op1 = gen_lowpart (imode, op1);
36758 if (omode == V16QImode)
36759 x = target;
36760 else
36761 x = gen_reg_rtx (omode);
36762 emit_insn (GEN_FCN (icode) (x, op0, op1));
36763 if (omode != V16QImode)
36764 emit_move_insn (target, gen_lowpart (V16QImode, x));
36765 return true;
36769 if (!BYTES_BIG_ENDIAN)
36771 altivec_expand_vec_perm_const_le (operands);
36772 return true;
36775 return false;
36778 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
36779 Return true if we match an efficient implementation. */
36781 static bool
36782 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
36783 unsigned char perm0, unsigned char perm1)
36785 rtx x;
36787 /* If both selectors come from the same operand, fold to single op. */
36788 if ((perm0 & 2) == (perm1 & 2))
36790 if (perm0 & 2)
36791 op0 = op1;
36792 else
36793 op1 = op0;
36795 /* If both operands are equal, fold to simpler permutation. */
36796 if (rtx_equal_p (op0, op1))
36798 perm0 = perm0 & 1;
36799 perm1 = (perm1 & 1) + 2;
36801 /* If the first selector comes from the second operand, swap. */
36802 else if (perm0 & 2)
36804 if (perm1 & 2)
36805 return false;
36806 perm0 -= 2;
36807 perm1 += 2;
36808 std::swap (op0, op1);
36810 /* If the second selector does not come from the second operand, fail. */
36811 else if ((perm1 & 2) == 0)
36812 return false;
36814 /* Success! */
36815 if (target != NULL)
36817 machine_mode vmode, dmode;
36818 rtvec v;
36820 vmode = GET_MODE (target);
36821 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
36822 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
36823 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
36824 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
36825 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
36826 emit_insn (gen_rtx_SET (target, x));
36828 return true;
36831 bool
36832 rs6000_expand_vec_perm_const (rtx operands[4])
36834 rtx target, op0, op1, sel;
36835 unsigned char perm0, perm1;
36837 target = operands[0];
36838 op0 = operands[1];
36839 op1 = operands[2];
36840 sel = operands[3];
36842 /* Unpack the constant selector. */
36843 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
36844 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
36846 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
36849 /* Test whether a constant permutation is supported. */
36851 static bool
36852 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
36853 const unsigned char *sel)
36855 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
36856 if (TARGET_ALTIVEC)
36857 return true;
36859 /* Check for ps_merge* or evmerge* insns. */
36860 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
36861 || (TARGET_SPE && vmode == V2SImode))
36863 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
36864 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
36865 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
36868 return false;
36871 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
36873 static void
36874 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36875 machine_mode vmode, unsigned nelt, rtx perm[])
36877 machine_mode imode;
36878 rtx x;
36880 imode = vmode;
36881 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
36883 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
36884 imode = mode_for_vector (imode, nelt);
36887 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
36888 x = expand_vec_perm (vmode, op0, op1, x, target);
36889 if (x != target)
36890 emit_move_insn (target, x);
36893 /* Expand an extract even operation. */
36895 void
36896 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36898 machine_mode vmode = GET_MODE (target);
36899 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36900 rtx perm[16];
36902 for (i = 0; i < nelt; i++)
36903 perm[i] = GEN_INT (i * 2);
36905 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36908 /* Expand a vector interleave operation. */
36910 void
36911 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36913 machine_mode vmode = GET_MODE (target);
36914 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36915 rtx perm[16];
36917 high = (highp ? 0 : nelt / 2);
36918 for (i = 0; i < nelt / 2; i++)
36920 perm[i * 2] = GEN_INT (i + high);
36921 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
36924 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36927 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36928 void
36929 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36931 HOST_WIDE_INT hwi_scale (scale);
36932 REAL_VALUE_TYPE r_pow;
36933 rtvec v = rtvec_alloc (2);
36934 rtx elt;
36935 rtx scale_vec = gen_reg_rtx (V2DFmode);
36936 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36937 elt = const_double_from_real_value (r_pow, DFmode);
36938 RTVEC_ELT (v, 0) = elt;
36939 RTVEC_ELT (v, 1) = elt;
36940 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36941 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36944 /* Return an RTX representing where to find the function value of a
36945 function returning MODE. */
36946 static rtx
36947 rs6000_complex_function_value (machine_mode mode)
36949 unsigned int regno;
36950 rtx r1, r2;
36951 machine_mode inner = GET_MODE_INNER (mode);
36952 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36954 if (TARGET_FLOAT128_TYPE
36955 && (mode == KCmode
36956 || (mode == TCmode && TARGET_IEEEQUAD)))
36957 regno = ALTIVEC_ARG_RETURN;
36959 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36960 regno = FP_ARG_RETURN;
36962 else
36964 regno = GP_ARG_RETURN;
36966 /* 32-bit is OK since it'll go in r3/r4. */
36967 if (TARGET_32BIT && inner_bytes >= 4)
36968 return gen_rtx_REG (mode, regno);
36971 if (inner_bytes >= 8)
36972 return gen_rtx_REG (mode, regno);
36974 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36975 const0_rtx);
36976 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36977 GEN_INT (inner_bytes));
36978 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36981 /* Return an rtx describing a return value of MODE as a PARALLEL
36982 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36983 stride REG_STRIDE. */
36985 static rtx
36986 rs6000_parallel_return (machine_mode mode,
36987 int n_elts, machine_mode elt_mode,
36988 unsigned int regno, unsigned int reg_stride)
36990 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36992 int i;
36993 for (i = 0; i < n_elts; i++)
36995 rtx r = gen_rtx_REG (elt_mode, regno);
36996 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36997 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36998 regno += reg_stride;
37001 return par;
37004 /* Target hook for TARGET_FUNCTION_VALUE.
37006 On the SPE, both FPs and vectors are returned in r3.
37008 On RS/6000 an integer value is in r3 and a floating-point value is in
37009 fp1, unless -msoft-float. */
37011 static rtx
37012 rs6000_function_value (const_tree valtype,
37013 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
37014 bool outgoing ATTRIBUTE_UNUSED)
37016 machine_mode mode;
37017 unsigned int regno;
37018 machine_mode elt_mode;
37019 int n_elts;
37021 /* Special handling for structs in darwin64. */
37022 if (TARGET_MACHO
37023 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
37025 CUMULATIVE_ARGS valcum;
37026 rtx valret;
37028 valcum.words = 0;
37029 valcum.fregno = FP_ARG_MIN_REG;
37030 valcum.vregno = ALTIVEC_ARG_MIN_REG;
37031 /* Do a trial code generation as if this were going to be passed as
37032 an argument; if any part goes in memory, we return NULL. */
37033 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
37034 if (valret)
37035 return valret;
37036 /* Otherwise fall through to standard ABI rules. */
37039 mode = TYPE_MODE (valtype);
37041 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
37042 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
37044 int first_reg, n_regs;
37046 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
37048 /* _Decimal128 must use even/odd register pairs. */
37049 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37050 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
37052 else
37054 first_reg = ALTIVEC_ARG_RETURN;
37055 n_regs = 1;
37058 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
37061 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
37062 if (TARGET_32BIT && TARGET_POWERPC64)
37063 switch (mode)
37065 default:
37066 break;
37067 case DImode:
37068 case SCmode:
37069 case DCmode:
37070 case TCmode:
37071 int count = GET_MODE_SIZE (mode) / 4;
37072 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
37075 if ((INTEGRAL_TYPE_P (valtype)
37076 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
37077 || POINTER_TYPE_P (valtype))
37078 mode = TARGET_32BIT ? SImode : DImode;
37080 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37081 /* _Decimal128 must use an even/odd register pair. */
37082 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37083 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
37084 && !FLOAT128_VECTOR_P (mode)
37085 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
37086 regno = FP_ARG_RETURN;
37087 else if (TREE_CODE (valtype) == COMPLEX_TYPE
37088 && targetm.calls.split_complex_arg)
37089 return rs6000_complex_function_value (mode);
37090 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37091 return register is used in both cases, and we won't see V2DImode/V2DFmode
37092 for pure altivec, combine the two cases. */
37093 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
37094 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
37095 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37096 regno = ALTIVEC_ARG_RETURN;
37097 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37098 && (mode == DFmode || mode == DCmode
37099 || FLOAT128_IBM_P (mode) || mode == TCmode))
37100 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37101 else
37102 regno = GP_ARG_RETURN;
37104 return gen_rtx_REG (mode, regno);
37107 /* Define how to find the value returned by a library function
37108 assuming the value has mode MODE. */
37110 rs6000_libcall_value (machine_mode mode)
37112 unsigned int regno;
37114 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
37115 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
37116 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
37118 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
37119 /* _Decimal128 must use an even/odd register pair. */
37120 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
37121 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
37122 && TARGET_HARD_FLOAT && TARGET_FPRS
37123 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
37124 regno = FP_ARG_RETURN;
37125 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
37126 return register is used in both cases, and we won't see V2DImode/V2DFmode
37127 for pure altivec, combine the two cases. */
37128 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
37129 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
37130 regno = ALTIVEC_ARG_RETURN;
37131 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
37132 return rs6000_complex_function_value (mode);
37133 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
37134 && (mode == DFmode || mode == DCmode
37135 || FLOAT128_IBM_P (mode) || mode == TCmode))
37136 return spe_build_register_parallel (mode, GP_ARG_RETURN);
37137 else
37138 regno = GP_ARG_RETURN;
37140 return gen_rtx_REG (mode, regno);
37144 /* Return true if we use LRA instead of reload pass. */
37145 static bool
37146 rs6000_lra_p (void)
37148 return TARGET_LRA;
37151 /* Given FROM and TO register numbers, say whether this elimination is allowed.
37152 Frame pointer elimination is automatically handled.
37154 For the RS/6000, if frame pointer elimination is being done, we would like
37155 to convert ap into fp, not sp.
37157 We need r30 if -mminimal-toc was specified, and there are constant pool
37158 references. */
37160 static bool
37161 rs6000_can_eliminate (const int from, const int to)
37163 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
37164 ? ! frame_pointer_needed
37165 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
37166 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
37167 : true);
37170 /* Define the offset between two registers, FROM to be eliminated and its
37171 replacement TO, at the start of a routine. */
37172 HOST_WIDE_INT
37173 rs6000_initial_elimination_offset (int from, int to)
37175 rs6000_stack_t *info = rs6000_stack_info ();
37176 HOST_WIDE_INT offset;
37178 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37179 offset = info->push_p ? 0 : -info->total_size;
37180 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37182 offset = info->push_p ? 0 : -info->total_size;
37183 if (FRAME_GROWS_DOWNWARD)
37184 offset += info->fixed_size + info->vars_size + info->parm_size;
37186 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37187 offset = FRAME_GROWS_DOWNWARD
37188 ? info->fixed_size + info->vars_size + info->parm_size
37189 : 0;
37190 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
37191 offset = info->total_size;
37192 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
37193 offset = info->push_p ? info->total_size : 0;
37194 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
37195 offset = 0;
37196 else
37197 gcc_unreachable ();
37199 return offset;
37202 static rtx
37203 rs6000_dwarf_register_span (rtx reg)
37205 rtx parts[8];
37206 int i, words;
37207 unsigned regno = REGNO (reg);
37208 machine_mode mode = GET_MODE (reg);
37210 if (TARGET_SPE
37211 && regno < 32
37212 && (SPE_VECTOR_MODE (GET_MODE (reg))
37213 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
37214 && mode != SFmode && mode != SDmode && mode != SCmode)))
37216 else
37217 return NULL_RTX;
37219 regno = REGNO (reg);
37221 /* The duality of the SPE register size wreaks all kinds of havoc.
37222 This is a way of distinguishing r0 in 32-bits from r0 in
37223 64-bits. */
37224 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
37225 gcc_assert (words <= 4);
37226 for (i = 0; i < words; i++, regno++)
37228 if (BYTES_BIG_ENDIAN)
37230 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37231 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
37233 else
37235 parts[2 * i] = gen_rtx_REG (SImode, regno);
37236 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
37240 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
37243 /* Fill in sizes for SPE register high parts in table used by unwinder. */
37245 static void
37246 rs6000_init_dwarf_reg_sizes_extra (tree address)
37248 if (TARGET_SPE)
37250 int i;
37251 machine_mode mode = TYPE_MODE (char_type_node);
37252 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37253 rtx mem = gen_rtx_MEM (BLKmode, addr);
37254 rtx value = gen_int_mode (4, mode);
37256 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
37258 int column = DWARF_REG_TO_UNWIND_COLUMN
37259 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37260 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37262 emit_move_insn (adjust_address (mem, mode, offset), value);
37266 if (TARGET_MACHO && ! TARGET_ALTIVEC)
37268 int i;
37269 machine_mode mode = TYPE_MODE (char_type_node);
37270 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
37271 rtx mem = gen_rtx_MEM (BLKmode, addr);
37272 rtx value = gen_int_mode (16, mode);
37274 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
37275 The unwinder still needs to know the size of Altivec registers. */
37277 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
37279 int column = DWARF_REG_TO_UNWIND_COLUMN
37280 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
37281 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
37283 emit_move_insn (adjust_address (mem, mode, offset), value);
37288 /* Map internal gcc register numbers to debug format register numbers.
37289 FORMAT specifies the type of debug register number to use:
37290 0 -- debug information, except for frame-related sections
37291 1 -- DWARF .debug_frame section
37292 2 -- DWARF .eh_frame section */
37294 unsigned int
37295 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
37297 /* We never use the GCC internal number for SPE high registers.
37298 Those are mapped to the 1200..1231 range for all debug formats. */
37299 if (SPE_HIGH_REGNO_P (regno))
37300 return regno - FIRST_SPE_HIGH_REGNO + 1200;
37302 /* Except for the above, we use the internal number for non-DWARF
37303 debug information, and also for .eh_frame. */
37304 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
37305 return regno;
37307 /* On some platforms, we use the standard DWARF register
37308 numbering for .debug_info and .debug_frame. */
37309 #ifdef RS6000_USE_DWARF_NUMBERING
37310 if (regno <= 63)
37311 return regno;
37312 if (regno == LR_REGNO)
37313 return 108;
37314 if (regno == CTR_REGNO)
37315 return 109;
37316 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
37317 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
37318 The actual code emitted saves the whole of CR, so we map CR2_REGNO
37319 to the DWARF reg for CR. */
37320 if (format == 1 && regno == CR2_REGNO)
37321 return 64;
37322 if (CR_REGNO_P (regno))
37323 return regno - CR0_REGNO + 86;
37324 if (regno == CA_REGNO)
37325 return 101; /* XER */
37326 if (ALTIVEC_REGNO_P (regno))
37327 return regno - FIRST_ALTIVEC_REGNO + 1124;
37328 if (regno == VRSAVE_REGNO)
37329 return 356;
37330 if (regno == VSCR_REGNO)
37331 return 67;
37332 if (regno == SPE_ACC_REGNO)
37333 return 99;
37334 if (regno == SPEFSCR_REGNO)
37335 return 612;
37336 #endif
37337 return regno;
37340 /* target hook eh_return_filter_mode */
37341 static machine_mode
37342 rs6000_eh_return_filter_mode (void)
37344 return TARGET_32BIT ? SImode : word_mode;
37347 /* Target hook for scalar_mode_supported_p. */
37348 static bool
37349 rs6000_scalar_mode_supported_p (machine_mode mode)
37351 /* -m32 does not support TImode. This is the default, from
37352 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
37353 same ABI as for -m32. But default_scalar_mode_supported_p allows
37354 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
37355 for -mpowerpc64. */
37356 if (TARGET_32BIT && mode == TImode)
37357 return false;
37359 if (DECIMAL_FLOAT_MODE_P (mode))
37360 return default_decimal_float_supported_p ();
37361 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
37362 return true;
37363 else
37364 return default_scalar_mode_supported_p (mode);
37367 /* Target hook for vector_mode_supported_p. */
37368 static bool
37369 rs6000_vector_mode_supported_p (machine_mode mode)
37372 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
37373 return true;
37375 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
37376 return true;
37378 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
37379 128-bit, the compiler might try to widen IEEE 128-bit to IBM
37380 double-double. */
37381 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
37382 return true;
37384 else
37385 return false;
37388 /* Target hook for floatn_mode. */
37389 static machine_mode
37390 rs6000_floatn_mode (int n, bool extended)
37392 if (extended)
37394 switch (n)
37396 case 32:
37397 return DFmode;
37399 case 64:
37400 if (TARGET_FLOAT128_KEYWORD)
37401 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37402 else
37403 return VOIDmode;
37405 case 128:
37406 return VOIDmode;
37408 default:
37409 /* Those are the only valid _FloatNx types. */
37410 gcc_unreachable ();
37413 else
37415 switch (n)
37417 case 32:
37418 return SFmode;
37420 case 64:
37421 return DFmode;
37423 case 128:
37424 if (TARGET_FLOAT128_KEYWORD)
37425 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37426 else
37427 return VOIDmode;
37429 default:
37430 return VOIDmode;
37436 /* Target hook for c_mode_for_suffix. */
37437 static machine_mode
37438 rs6000_c_mode_for_suffix (char suffix)
37440 if (TARGET_FLOAT128_TYPE)
37442 if (suffix == 'q' || suffix == 'Q')
37443 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
37445 /* At the moment, we are not defining a suffix for IBM extended double.
37446 If/when the default for -mabi=ieeelongdouble is changed, and we want
37447 to support __ibm128 constants in legacy library code, we may need to
37448 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
37449 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
37450 __float80 constants. */
37453 return VOIDmode;
37456 /* Target hook for invalid_arg_for_unprototyped_fn. */
37457 static const char *
37458 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
37460 return (!rs6000_darwin64_abi
37461 && typelist == 0
37462 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
37463 && (funcdecl == NULL_TREE
37464 || (TREE_CODE (funcdecl) == FUNCTION_DECL
37465 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
37466 ? N_("AltiVec argument passed to unprototyped function")
37467 : NULL;
37470 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
37471 setup by using __stack_chk_fail_local hidden function instead of
37472 calling __stack_chk_fail directly. Otherwise it is better to call
37473 __stack_chk_fail directly. */
37475 static tree ATTRIBUTE_UNUSED
37476 rs6000_stack_protect_fail (void)
37478 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
37479 ? default_hidden_stack_protect_fail ()
37480 : default_external_stack_protect_fail ();
37483 void
37484 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
37485 int num_operands ATTRIBUTE_UNUSED)
37487 if (rs6000_warn_cell_microcode)
37489 const char *temp;
37490 int insn_code_number = recog_memoized (insn);
37491 location_t location = INSN_LOCATION (insn);
37493 /* Punt on insns we cannot recognize. */
37494 if (insn_code_number < 0)
37495 return;
37497 temp = get_insn_template (insn_code_number, insn);
37499 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
37500 warning_at (location, OPT_mwarn_cell_microcode,
37501 "emitting microcode insn %s\t[%s] #%d",
37502 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37503 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
37504 warning_at (location, OPT_mwarn_cell_microcode,
37505 "emitting conditional microcode insn %s\t[%s] #%d",
37506 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
37510 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
37512 #if TARGET_ELF
37513 static unsigned HOST_WIDE_INT
37514 rs6000_asan_shadow_offset (void)
37516 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
37518 #endif
37520 /* Mask options that we want to support inside of attribute((target)) and
37521 #pragma GCC target operations. Note, we do not include things like
37522 64/32-bit, endianess, hard/soft floating point, etc. that would have
37523 different calling sequences. */
37525 struct rs6000_opt_mask {
37526 const char *name; /* option name */
37527 HOST_WIDE_INT mask; /* mask to set */
37528 bool invert; /* invert sense of mask */
37529 bool valid_target; /* option is a target option */
37532 static struct rs6000_opt_mask const rs6000_opt_masks[] =
37534 { "altivec", OPTION_MASK_ALTIVEC, false, true },
37535 { "cmpb", OPTION_MASK_CMPB, false, true },
37536 { "crypto", OPTION_MASK_CRYPTO, false, true },
37537 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
37538 { "dlmzb", OPTION_MASK_DLMZB, false, true },
37539 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
37540 false, true },
37541 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
37542 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
37543 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
37544 { "fprnd", OPTION_MASK_FPRND, false, true },
37545 { "hard-dfp", OPTION_MASK_DFP, false, true },
37546 { "htm", OPTION_MASK_HTM, false, true },
37547 { "isel", OPTION_MASK_ISEL, false, true },
37548 { "mfcrf", OPTION_MASK_MFCRF, false, true },
37549 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
37550 { "modulo", OPTION_MASK_MODULO, false, true },
37551 { "mulhw", OPTION_MASK_MULHW, false, true },
37552 { "multiple", OPTION_MASK_MULTIPLE, false, true },
37553 { "popcntb", OPTION_MASK_POPCNTB, false, true },
37554 { "popcntd", OPTION_MASK_POPCNTD, false, true },
37555 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
37556 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
37557 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
37558 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
37559 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
37560 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
37561 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
37562 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
37563 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
37564 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
37565 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
37566 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
37567 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
37568 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
37569 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
37570 { "string", OPTION_MASK_STRING, false, true },
37571 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
37572 { "update", OPTION_MASK_NO_UPDATE, true , true },
37573 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
37574 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
37575 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
37576 { "vsx", OPTION_MASK_VSX, false, true },
37577 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
37578 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
37579 #ifdef OPTION_MASK_64BIT
37580 #if TARGET_AIX_OS
37581 { "aix64", OPTION_MASK_64BIT, false, false },
37582 { "aix32", OPTION_MASK_64BIT, true, false },
37583 #else
37584 { "64", OPTION_MASK_64BIT, false, false },
37585 { "32", OPTION_MASK_64BIT, true, false },
37586 #endif
37587 #endif
37588 #ifdef OPTION_MASK_EABI
37589 { "eabi", OPTION_MASK_EABI, false, false },
37590 #endif
37591 #ifdef OPTION_MASK_LITTLE_ENDIAN
37592 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
37593 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
37594 #endif
37595 #ifdef OPTION_MASK_RELOCATABLE
37596 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
37597 #endif
37598 #ifdef OPTION_MASK_STRICT_ALIGN
37599 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
37600 #endif
37601 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
37602 { "string", OPTION_MASK_STRING, false, false },
37605 /* Builtin mask mapping for printing the flags. */
37606 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
37608 { "altivec", RS6000_BTM_ALTIVEC, false, false },
37609 { "vsx", RS6000_BTM_VSX, false, false },
37610 { "spe", RS6000_BTM_SPE, false, false },
37611 { "paired", RS6000_BTM_PAIRED, false, false },
37612 { "fre", RS6000_BTM_FRE, false, false },
37613 { "fres", RS6000_BTM_FRES, false, false },
37614 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
37615 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
37616 { "popcntd", RS6000_BTM_POPCNTD, false, false },
37617 { "cell", RS6000_BTM_CELL, false, false },
37618 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
37619 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
37620 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
37621 { "crypto", RS6000_BTM_CRYPTO, false, false },
37622 { "htm", RS6000_BTM_HTM, false, false },
37623 { "hard-dfp", RS6000_BTM_DFP, false, false },
37624 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
37625 { "long-double-128", RS6000_BTM_LDBL128, false, false },
37626 { "float128", RS6000_BTM_FLOAT128, false, false },
37629 /* Option variables that we want to support inside attribute((target)) and
37630 #pragma GCC target operations. */
37632 struct rs6000_opt_var {
37633 const char *name; /* option name */
37634 size_t global_offset; /* offset of the option in global_options. */
37635 size_t target_offset; /* offset of the option in target options. */
37638 static struct rs6000_opt_var const rs6000_opt_vars[] =
37640 { "friz",
37641 offsetof (struct gcc_options, x_TARGET_FRIZ),
37642 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
37643 { "avoid-indexed-addresses",
37644 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
37645 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
37646 { "paired",
37647 offsetof (struct gcc_options, x_rs6000_paired_float),
37648 offsetof (struct cl_target_option, x_rs6000_paired_float), },
37649 { "longcall",
37650 offsetof (struct gcc_options, x_rs6000_default_long_calls),
37651 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
37652 { "optimize-swaps",
37653 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
37654 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
37655 { "allow-movmisalign",
37656 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
37657 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
37658 { "allow-df-permute",
37659 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
37660 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
37661 { "sched-groups",
37662 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
37663 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
37664 { "always-hint",
37665 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
37666 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
37667 { "align-branch-targets",
37668 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
37669 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
37670 { "vectorize-builtins",
37671 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
37672 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
37673 { "tls-markers",
37674 offsetof (struct gcc_options, x_tls_markers),
37675 offsetof (struct cl_target_option, x_tls_markers), },
37676 { "sched-prolog",
37677 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37678 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37679 { "sched-epilog",
37680 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37681 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37682 { "gen-cell-microcode",
37683 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
37684 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
37685 { "warn-cell-microcode",
37686 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
37687 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
37690 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
37691 parsing. Return true if there were no errors. */
37693 static bool
37694 rs6000_inner_target_options (tree args, bool attr_p)
37696 bool ret = true;
37698 if (args == NULL_TREE)
37701 else if (TREE_CODE (args) == STRING_CST)
37703 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37704 char *q;
37706 while ((q = strtok (p, ",")) != NULL)
37708 bool error_p = false;
37709 bool not_valid_p = false;
37710 const char *cpu_opt = NULL;
37712 p = NULL;
37713 if (strncmp (q, "cpu=", 4) == 0)
37715 int cpu_index = rs6000_cpu_name_lookup (q+4);
37716 if (cpu_index >= 0)
37717 rs6000_cpu_index = cpu_index;
37718 else
37720 error_p = true;
37721 cpu_opt = q+4;
37724 else if (strncmp (q, "tune=", 5) == 0)
37726 int tune_index = rs6000_cpu_name_lookup (q+5);
37727 if (tune_index >= 0)
37728 rs6000_tune_index = tune_index;
37729 else
37731 error_p = true;
37732 cpu_opt = q+5;
37735 else
37737 size_t i;
37738 bool invert = false;
37739 char *r = q;
37741 error_p = true;
37742 if (strncmp (r, "no-", 3) == 0)
37744 invert = true;
37745 r += 3;
37748 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
37749 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
37751 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
37753 if (!rs6000_opt_masks[i].valid_target)
37754 not_valid_p = true;
37755 else
37757 error_p = false;
37758 rs6000_isa_flags_explicit |= mask;
37760 /* VSX needs altivec, so -mvsx automagically sets
37761 altivec and disables -mavoid-indexed-addresses. */
37762 if (!invert)
37764 if (mask == OPTION_MASK_VSX)
37766 mask |= OPTION_MASK_ALTIVEC;
37767 TARGET_AVOID_XFORM = 0;
37771 if (rs6000_opt_masks[i].invert)
37772 invert = !invert;
37774 if (invert)
37775 rs6000_isa_flags &= ~mask;
37776 else
37777 rs6000_isa_flags |= mask;
37779 break;
37782 if (error_p && !not_valid_p)
37784 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
37785 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
37787 size_t j = rs6000_opt_vars[i].global_offset;
37788 *((int *) ((char *)&global_options + j)) = !invert;
37789 error_p = false;
37790 not_valid_p = false;
37791 break;
37796 if (error_p)
37798 const char *eprefix, *esuffix;
37800 ret = false;
37801 if (attr_p)
37803 eprefix = "__attribute__((__target__(";
37804 esuffix = ")))";
37806 else
37808 eprefix = "#pragma GCC target ";
37809 esuffix = "";
37812 if (cpu_opt)
37813 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
37814 q, esuffix);
37815 else if (not_valid_p)
37816 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
37817 else
37818 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
37823 else if (TREE_CODE (args) == TREE_LIST)
37827 tree value = TREE_VALUE (args);
37828 if (value)
37830 bool ret2 = rs6000_inner_target_options (value, attr_p);
37831 if (!ret2)
37832 ret = false;
37834 args = TREE_CHAIN (args);
37836 while (args != NULL_TREE);
37839 else
37840 gcc_unreachable ();
37842 return ret;
37845 /* Print out the target options as a list for -mdebug=target. */
37847 static void
37848 rs6000_debug_target_options (tree args, const char *prefix)
37850 if (args == NULL_TREE)
37851 fprintf (stderr, "%s<NULL>", prefix);
37853 else if (TREE_CODE (args) == STRING_CST)
37855 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37856 char *q;
37858 while ((q = strtok (p, ",")) != NULL)
37860 p = NULL;
37861 fprintf (stderr, "%s\"%s\"", prefix, q);
37862 prefix = ", ";
37866 else if (TREE_CODE (args) == TREE_LIST)
37870 tree value = TREE_VALUE (args);
37871 if (value)
37873 rs6000_debug_target_options (value, prefix);
37874 prefix = ", ";
37876 args = TREE_CHAIN (args);
37878 while (args != NULL_TREE);
37881 else
37882 gcc_unreachable ();
37884 return;
37888 /* Hook to validate attribute((target("..."))). */
37890 static bool
37891 rs6000_valid_attribute_p (tree fndecl,
37892 tree ARG_UNUSED (name),
37893 tree args,
37894 int flags)
37896 struct cl_target_option cur_target;
37897 bool ret;
37898 tree old_optimize = build_optimization_node (&global_options);
37899 tree new_target, new_optimize;
37900 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37902 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
37904 if (TARGET_DEBUG_TARGET)
37906 tree tname = DECL_NAME (fndecl);
37907 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
37908 if (tname)
37909 fprintf (stderr, "function: %.*s\n",
37910 (int) IDENTIFIER_LENGTH (tname),
37911 IDENTIFIER_POINTER (tname));
37912 else
37913 fprintf (stderr, "function: unknown\n");
37915 fprintf (stderr, "args:");
37916 rs6000_debug_target_options (args, " ");
37917 fprintf (stderr, "\n");
37919 if (flags)
37920 fprintf (stderr, "flags: 0x%x\n", flags);
37922 fprintf (stderr, "--------------------\n");
37925 old_optimize = build_optimization_node (&global_options);
37926 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37928 /* If the function changed the optimization levels as well as setting target
37929 options, start with the optimizations specified. */
37930 if (func_optimize && func_optimize != old_optimize)
37931 cl_optimization_restore (&global_options,
37932 TREE_OPTIMIZATION (func_optimize));
37934 /* The target attributes may also change some optimization flags, so update
37935 the optimization options if necessary. */
37936 cl_target_option_save (&cur_target, &global_options);
37937 rs6000_cpu_index = rs6000_tune_index = -1;
37938 ret = rs6000_inner_target_options (args, true);
37940 /* Set up any additional state. */
37941 if (ret)
37943 ret = rs6000_option_override_internal (false);
37944 new_target = build_target_option_node (&global_options);
37946 else
37947 new_target = NULL;
37949 new_optimize = build_optimization_node (&global_options);
37951 if (!new_target)
37952 ret = false;
37954 else if (fndecl)
37956 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
37958 if (old_optimize != new_optimize)
37959 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
37962 cl_target_option_restore (&global_options, &cur_target);
37964 if (old_optimize != new_optimize)
37965 cl_optimization_restore (&global_options,
37966 TREE_OPTIMIZATION (old_optimize));
37968 return ret;
37972 /* Hook to validate the current #pragma GCC target and set the state, and
37973 update the macros based on what was changed. If ARGS is NULL, then
37974 POP_TARGET is used to reset the options. */
37976 bool
37977 rs6000_pragma_target_parse (tree args, tree pop_target)
37979 tree prev_tree = build_target_option_node (&global_options);
37980 tree cur_tree;
37981 struct cl_target_option *prev_opt, *cur_opt;
37982 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37983 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37985 if (TARGET_DEBUG_TARGET)
37987 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37988 fprintf (stderr, "args:");
37989 rs6000_debug_target_options (args, " ");
37990 fprintf (stderr, "\n");
37992 if (pop_target)
37994 fprintf (stderr, "pop_target:\n");
37995 debug_tree (pop_target);
37997 else
37998 fprintf (stderr, "pop_target: <NULL>\n");
38000 fprintf (stderr, "--------------------\n");
38003 if (! args)
38005 cur_tree = ((pop_target)
38006 ? pop_target
38007 : target_option_default_node);
38008 cl_target_option_restore (&global_options,
38009 TREE_TARGET_OPTION (cur_tree));
38011 else
38013 rs6000_cpu_index = rs6000_tune_index = -1;
38014 if (!rs6000_inner_target_options (args, false)
38015 || !rs6000_option_override_internal (false)
38016 || (cur_tree = build_target_option_node (&global_options))
38017 == NULL_TREE)
38019 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
38020 fprintf (stderr, "invalid pragma\n");
38022 return false;
38026 target_option_current_node = cur_tree;
38028 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
38029 change the macros that are defined. */
38030 if (rs6000_target_modify_macros_ptr)
38032 prev_opt = TREE_TARGET_OPTION (prev_tree);
38033 prev_bumask = prev_opt->x_rs6000_builtin_mask;
38034 prev_flags = prev_opt->x_rs6000_isa_flags;
38036 cur_opt = TREE_TARGET_OPTION (cur_tree);
38037 cur_flags = cur_opt->x_rs6000_isa_flags;
38038 cur_bumask = cur_opt->x_rs6000_builtin_mask;
38040 diff_bumask = (prev_bumask ^ cur_bumask);
38041 diff_flags = (prev_flags ^ cur_flags);
38043 if ((diff_flags != 0) || (diff_bumask != 0))
38045 /* Delete old macros. */
38046 rs6000_target_modify_macros_ptr (false,
38047 prev_flags & diff_flags,
38048 prev_bumask & diff_bumask);
38050 /* Define new macros. */
38051 rs6000_target_modify_macros_ptr (true,
38052 cur_flags & diff_flags,
38053 cur_bumask & diff_bumask);
38057 return true;
38061 /* Remember the last target of rs6000_set_current_function. */
38062 static GTY(()) tree rs6000_previous_fndecl;
38064 /* Establish appropriate back-end context for processing the function
38065 FNDECL. The argument might be NULL to indicate processing at top
38066 level, outside of any function scope. */
38067 static void
38068 rs6000_set_current_function (tree fndecl)
38070 tree old_tree = (rs6000_previous_fndecl
38071 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
38072 : NULL_TREE);
38074 tree new_tree = (fndecl
38075 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
38076 : NULL_TREE);
38078 if (TARGET_DEBUG_TARGET)
38080 bool print_final = false;
38081 fprintf (stderr, "\n==================== rs6000_set_current_function");
38083 if (fndecl)
38084 fprintf (stderr, ", fndecl %s (%p)",
38085 (DECL_NAME (fndecl)
38086 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
38087 : "<unknown>"), (void *)fndecl);
38089 if (rs6000_previous_fndecl)
38090 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
38092 fprintf (stderr, "\n");
38093 if (new_tree)
38095 fprintf (stderr, "\nnew fndecl target specific options:\n");
38096 debug_tree (new_tree);
38097 print_final = true;
38100 if (old_tree)
38102 fprintf (stderr, "\nold fndecl target specific options:\n");
38103 debug_tree (old_tree);
38104 print_final = true;
38107 if (print_final)
38108 fprintf (stderr, "--------------------\n");
38111 /* Only change the context if the function changes. This hook is called
38112 several times in the course of compiling a function, and we don't want to
38113 slow things down too much or call target_reinit when it isn't safe. */
38114 if (fndecl && fndecl != rs6000_previous_fndecl)
38116 rs6000_previous_fndecl = fndecl;
38117 if (old_tree == new_tree)
38120 else if (new_tree && new_tree != target_option_default_node)
38122 cl_target_option_restore (&global_options,
38123 TREE_TARGET_OPTION (new_tree));
38124 if (TREE_TARGET_GLOBALS (new_tree))
38125 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38126 else
38127 TREE_TARGET_GLOBALS (new_tree)
38128 = save_target_globals_default_opts ();
38131 else if (old_tree && old_tree != target_option_default_node)
38133 new_tree = target_option_current_node;
38134 cl_target_option_restore (&global_options,
38135 TREE_TARGET_OPTION (new_tree));
38136 if (TREE_TARGET_GLOBALS (new_tree))
38137 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
38138 else if (new_tree == target_option_default_node)
38139 restore_target_globals (&default_target_globals);
38140 else
38141 TREE_TARGET_GLOBALS (new_tree)
38142 = save_target_globals_default_opts ();
38148 /* Save the current options */
38150 static void
38151 rs6000_function_specific_save (struct cl_target_option *ptr,
38152 struct gcc_options *opts)
38154 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
38155 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
38158 /* Restore the current options */
38160 static void
38161 rs6000_function_specific_restore (struct gcc_options *opts,
38162 struct cl_target_option *ptr)
38165 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
38166 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
38167 (void) rs6000_option_override_internal (false);
38170 /* Print the current options */
38172 static void
38173 rs6000_function_specific_print (FILE *file, int indent,
38174 struct cl_target_option *ptr)
38176 rs6000_print_isa_options (file, indent, "Isa options set",
38177 ptr->x_rs6000_isa_flags);
38179 rs6000_print_isa_options (file, indent, "Isa options explicit",
38180 ptr->x_rs6000_isa_flags_explicit);
38183 /* Helper function to print the current isa or misc options on a line. */
38185 static void
38186 rs6000_print_options_internal (FILE *file,
38187 int indent,
38188 const char *string,
38189 HOST_WIDE_INT flags,
38190 const char *prefix,
38191 const struct rs6000_opt_mask *opts,
38192 size_t num_elements)
38194 size_t i;
38195 size_t start_column = 0;
38196 size_t cur_column;
38197 size_t max_column = 120;
38198 size_t prefix_len = strlen (prefix);
38199 size_t comma_len = 0;
38200 const char *comma = "";
38202 if (indent)
38203 start_column += fprintf (file, "%*s", indent, "");
38205 if (!flags)
38207 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
38208 return;
38211 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
38213 /* Print the various mask options. */
38214 cur_column = start_column;
38215 for (i = 0; i < num_elements; i++)
38217 bool invert = opts[i].invert;
38218 const char *name = opts[i].name;
38219 const char *no_str = "";
38220 HOST_WIDE_INT mask = opts[i].mask;
38221 size_t len = comma_len + prefix_len + strlen (name);
38223 if (!invert)
38225 if ((flags & mask) == 0)
38227 no_str = "no-";
38228 len += sizeof ("no-") - 1;
38231 flags &= ~mask;
38234 else
38236 if ((flags & mask) != 0)
38238 no_str = "no-";
38239 len += sizeof ("no-") - 1;
38242 flags |= mask;
38245 cur_column += len;
38246 if (cur_column > max_column)
38248 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
38249 cur_column = start_column + len;
38250 comma = "";
38253 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
38254 comma = ", ";
38255 comma_len = sizeof (", ") - 1;
38258 fputs ("\n", file);
38261 /* Helper function to print the current isa options on a line. */
38263 static void
38264 rs6000_print_isa_options (FILE *file, int indent, const char *string,
38265 HOST_WIDE_INT flags)
38267 rs6000_print_options_internal (file, indent, string, flags, "-m",
38268 &rs6000_opt_masks[0],
38269 ARRAY_SIZE (rs6000_opt_masks));
38272 static void
38273 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
38274 HOST_WIDE_INT flags)
38276 rs6000_print_options_internal (file, indent, string, flags, "",
38277 &rs6000_builtin_mask_names[0],
38278 ARRAY_SIZE (rs6000_builtin_mask_names));
38282 /* Hook to determine if one function can safely inline another. */
38284 static bool
38285 rs6000_can_inline_p (tree caller, tree callee)
38287 bool ret = false;
38288 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
38289 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
38291 /* If callee has no option attributes, then it is ok to inline. */
38292 if (!callee_tree)
38293 ret = true;
38295 /* If caller has no option attributes, but callee does then it is not ok to
38296 inline. */
38297 else if (!caller_tree)
38298 ret = false;
38300 else
38302 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
38303 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
38305 /* Callee's options should a subset of the caller's, i.e. a vsx function
38306 can inline an altivec function but a non-vsx function can't inline a
38307 vsx function. */
38308 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
38309 == callee_opts->x_rs6000_isa_flags)
38310 ret = true;
38313 if (TARGET_DEBUG_TARGET)
38314 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
38315 (DECL_NAME (caller)
38316 ? IDENTIFIER_POINTER (DECL_NAME (caller))
38317 : "<unknown>"),
38318 (DECL_NAME (callee)
38319 ? IDENTIFIER_POINTER (DECL_NAME (callee))
38320 : "<unknown>"),
38321 (ret ? "can" : "cannot"));
38323 return ret;
38326 /* Allocate a stack temp and fixup the address so it meets the particular
38327 memory requirements (either offetable or REG+REG addressing). */
38330 rs6000_allocate_stack_temp (machine_mode mode,
38331 bool offsettable_p,
38332 bool reg_reg_p)
38334 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
38335 rtx addr = XEXP (stack, 0);
38336 int strict_p = (reload_in_progress || reload_completed);
38338 if (!legitimate_indirect_address_p (addr, strict_p))
38340 if (offsettable_p
38341 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
38342 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38344 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
38345 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
38348 return stack;
38351 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
38352 to such a form to deal with memory reference instructions like STFIWX that
38353 only take reg+reg addressing. */
38356 rs6000_address_for_fpconvert (rtx x)
38358 int strict_p = (reload_in_progress || reload_completed);
38359 rtx addr;
38361 gcc_assert (MEM_P (x));
38362 addr = XEXP (x, 0);
38363 if (! legitimate_indirect_address_p (addr, strict_p)
38364 && ! legitimate_indexed_address_p (addr, strict_p))
38366 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
38368 rtx reg = XEXP (addr, 0);
38369 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
38370 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
38371 gcc_assert (REG_P (reg));
38372 emit_insn (gen_add3_insn (reg, reg, size_rtx));
38373 addr = reg;
38375 else if (GET_CODE (addr) == PRE_MODIFY)
38377 rtx reg = XEXP (addr, 0);
38378 rtx expr = XEXP (addr, 1);
38379 gcc_assert (REG_P (reg));
38380 gcc_assert (GET_CODE (expr) == PLUS);
38381 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
38382 addr = reg;
38385 x = replace_equiv_address (x, copy_addr_to_reg (addr));
38388 return x;
38391 /* Given a memory reference, if it is not in the form for altivec memory
38392 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
38393 convert to the altivec format. */
38396 rs6000_address_for_altivec (rtx x)
38398 gcc_assert (MEM_P (x));
38399 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
38401 rtx addr = XEXP (x, 0);
38402 int strict_p = (reload_in_progress || reload_completed);
38404 if (!legitimate_indexed_address_p (addr, strict_p)
38405 && !legitimate_indirect_address_p (addr, strict_p))
38406 addr = copy_to_mode_reg (Pmode, addr);
38408 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
38409 x = change_address (x, GET_MODE (x), addr);
38412 return x;
38415 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
38417 On the RS/6000, all integer constants are acceptable, most won't be valid
38418 for particular insns, though. Only easy FP constants are acceptable. */
38420 static bool
38421 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
38423 if (TARGET_ELF && tls_referenced_p (x))
38424 return false;
38426 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
38427 || GET_MODE (x) == VOIDmode
38428 || (TARGET_POWERPC64 && mode == DImode)
38429 || easy_fp_constant (x, mode)
38430 || easy_vector_constant (x, mode));
38434 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
38436 static bool
38437 chain_already_loaded (rtx_insn *last)
38439 for (; last != NULL; last = PREV_INSN (last))
38441 if (NONJUMP_INSN_P (last))
38443 rtx patt = PATTERN (last);
38445 if (GET_CODE (patt) == SET)
38447 rtx lhs = XEXP (patt, 0);
38449 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
38450 return true;
38454 return false;
38457 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
38459 void
38460 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38462 const bool direct_call_p
38463 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
38464 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
38465 rtx toc_load = NULL_RTX;
38466 rtx toc_restore = NULL_RTX;
38467 rtx func_addr;
38468 rtx abi_reg = NULL_RTX;
38469 rtx call[4];
38470 int n_call;
38471 rtx insn;
38473 /* Handle longcall attributes. */
38474 if (INTVAL (cookie) & CALL_LONG)
38475 func_desc = rs6000_longcall_ref (func_desc);
38477 /* Handle indirect calls. */
38478 if (GET_CODE (func_desc) != SYMBOL_REF
38479 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
38481 /* Save the TOC into its reserved slot before the call,
38482 and prepare to restore it after the call. */
38483 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
38484 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
38485 rtx stack_toc_mem = gen_frame_mem (Pmode,
38486 gen_rtx_PLUS (Pmode, stack_ptr,
38487 stack_toc_offset));
38488 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38489 gen_rtvec (1, stack_toc_offset),
38490 UNSPEC_TOCSLOT);
38491 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38493 /* Can we optimize saving the TOC in the prologue or
38494 do we need to do it at every call? */
38495 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38496 cfun->machine->save_toc_in_prologue = true;
38497 else
38499 MEM_VOLATILE_P (stack_toc_mem) = 1;
38500 emit_move_insn (stack_toc_mem, toc_reg);
38503 if (DEFAULT_ABI == ABI_ELFv2)
38505 /* A function pointer in the ELFv2 ABI is just a plain address, but
38506 the ABI requires it to be loaded into r12 before the call. */
38507 func_addr = gen_rtx_REG (Pmode, 12);
38508 emit_move_insn (func_addr, func_desc);
38509 abi_reg = func_addr;
38511 else
38513 /* A function pointer under AIX is a pointer to a data area whose
38514 first word contains the actual address of the function, whose
38515 second word contains a pointer to its TOC, and whose third word
38516 contains a value to place in the static chain register (r11).
38517 Note that if we load the static chain, our "trampoline" need
38518 not have any executable code. */
38520 /* Load up address of the actual function. */
38521 func_desc = force_reg (Pmode, func_desc);
38522 func_addr = gen_reg_rtx (Pmode);
38523 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38525 /* Prepare to load the TOC of the called function. Note that the
38526 TOC load must happen immediately before the actual call so
38527 that unwinding the TOC registers works correctly. See the
38528 comment in frob_update_context. */
38529 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38530 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38531 gen_rtx_PLUS (Pmode, func_desc,
38532 func_toc_offset));
38533 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38535 /* If we have a static chain, load it up. But, if the call was
38536 originally direct, the 3rd word has not been written since no
38537 trampoline has been built, so we ought not to load it, lest we
38538 override a static chain value. */
38539 if (!direct_call_p
38540 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38541 && !chain_already_loaded (get_current_sequence ()->next->last))
38543 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38544 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38545 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38546 gen_rtx_PLUS (Pmode, func_desc,
38547 func_sc_offset));
38548 emit_move_insn (sc_reg, func_sc_mem);
38549 abi_reg = sc_reg;
38553 else
38555 /* Direct calls use the TOC: for local calls, the callee will
38556 assume the TOC register is set; for non-local calls, the
38557 PLT stub needs the TOC register. */
38558 abi_reg = toc_reg;
38559 func_addr = func_desc;
38562 /* Create the call. */
38563 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38564 if (value != NULL_RTX)
38565 call[0] = gen_rtx_SET (value, call[0]);
38566 n_call = 1;
38568 if (toc_load)
38569 call[n_call++] = toc_load;
38570 if (toc_restore)
38571 call[n_call++] = toc_restore;
38573 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38575 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38576 insn = emit_call_insn (insn);
38578 /* Mention all registers defined by the ABI to hold information
38579 as uses in CALL_INSN_FUNCTION_USAGE. */
38580 if (abi_reg)
38581 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38584 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38586 void
38587 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38589 rtx call[2];
38590 rtx insn;
38592 gcc_assert (INTVAL (cookie) == 0);
38594 /* Create the call. */
38595 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38596 if (value != NULL_RTX)
38597 call[0] = gen_rtx_SET (value, call[0]);
38599 call[1] = simple_return_rtx;
38601 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38602 insn = emit_call_insn (insn);
38604 /* Note use of the TOC register. */
38605 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38608 /* Return whether we need to always update the saved TOC pointer when we update
38609 the stack pointer. */
38611 static bool
38612 rs6000_save_toc_in_prologue_p (void)
38614 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38617 #ifdef HAVE_GAS_HIDDEN
38618 # define USE_HIDDEN_LINKONCE 1
38619 #else
38620 # define USE_HIDDEN_LINKONCE 0
38621 #endif
38623 /* Fills in the label name that should be used for a 476 link stack thunk. */
38625 void
38626 get_ppc476_thunk_name (char name[32])
38628 gcc_assert (TARGET_LINK_STACK);
38630 if (USE_HIDDEN_LINKONCE)
38631 sprintf (name, "__ppc476.get_thunk");
38632 else
38633 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38636 /* This function emits the simple thunk routine that is used to preserve
38637 the link stack on the 476 cpu. */
38639 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38640 static void
38641 rs6000_code_end (void)
38643 char name[32];
38644 tree decl;
38646 if (!TARGET_LINK_STACK)
38647 return;
38649 get_ppc476_thunk_name (name);
38651 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38652 build_function_type_list (void_type_node, NULL_TREE));
38653 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38654 NULL_TREE, void_type_node);
38655 TREE_PUBLIC (decl) = 1;
38656 TREE_STATIC (decl) = 1;
38658 #if RS6000_WEAK
38659 if (USE_HIDDEN_LINKONCE)
38661 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38662 targetm.asm_out.unique_section (decl, 0);
38663 switch_to_section (get_named_section (decl, NULL, 0));
38664 DECL_WEAK (decl) = 1;
38665 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38666 targetm.asm_out.globalize_label (asm_out_file, name);
38667 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38668 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38670 else
38671 #endif
38673 switch_to_section (text_section);
38674 ASM_OUTPUT_LABEL (asm_out_file, name);
38677 DECL_INITIAL (decl) = make_node (BLOCK);
38678 current_function_decl = decl;
38679 allocate_struct_function (decl, false);
38680 init_function_start (decl);
38681 first_function_block_is_cold = false;
38682 /* Make sure unwind info is emitted for the thunk if needed. */
38683 final_start_function (emit_barrier (), asm_out_file, 1);
38685 fputs ("\tblr\n", asm_out_file);
38687 final_end_function ();
38688 init_insn_lengths ();
38689 free_after_compilation (cfun);
38690 set_cfun (NULL);
38691 current_function_decl = NULL;
38694 /* Add r30 to hard reg set if the prologue sets it up and it is not
38695 pic_offset_table_rtx. */
38697 static void
38698 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38700 if (!TARGET_SINGLE_PIC_BASE
38701 && TARGET_TOC
38702 && TARGET_MINIMAL_TOC
38703 && get_pool_size () != 0)
38704 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38705 if (cfun->machine->split_stack_argp_used)
38706 add_to_hard_reg_set (&set->set, Pmode, 12);
38710 /* Helper function for rs6000_split_logical to emit a logical instruction after
38711 spliting the operation to single GPR registers.
38713 DEST is the destination register.
38714 OP1 and OP2 are the input source registers.
38715 CODE is the base operation (AND, IOR, XOR, NOT).
38716 MODE is the machine mode.
38717 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38718 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38719 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38721 static void
38722 rs6000_split_logical_inner (rtx dest,
38723 rtx op1,
38724 rtx op2,
38725 enum rtx_code code,
38726 machine_mode mode,
38727 bool complement_final_p,
38728 bool complement_op1_p,
38729 bool complement_op2_p)
38731 rtx bool_rtx;
38733 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38734 if (op2 && GET_CODE (op2) == CONST_INT
38735 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38736 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38738 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38739 HOST_WIDE_INT value = INTVAL (op2) & mask;
38741 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38742 if (code == AND)
38744 if (value == 0)
38746 emit_insn (gen_rtx_SET (dest, const0_rtx));
38747 return;
38750 else if (value == mask)
38752 if (!rtx_equal_p (dest, op1))
38753 emit_insn (gen_rtx_SET (dest, op1));
38754 return;
38758 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38759 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38760 else if (code == IOR || code == XOR)
38762 if (value == 0)
38764 if (!rtx_equal_p (dest, op1))
38765 emit_insn (gen_rtx_SET (dest, op1));
38766 return;
38771 if (code == AND && mode == SImode
38772 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38774 emit_insn (gen_andsi3 (dest, op1, op2));
38775 return;
38778 if (complement_op1_p)
38779 op1 = gen_rtx_NOT (mode, op1);
38781 if (complement_op2_p)
38782 op2 = gen_rtx_NOT (mode, op2);
38784 /* For canonical RTL, if only one arm is inverted it is the first. */
38785 if (!complement_op1_p && complement_op2_p)
38786 std::swap (op1, op2);
38788 bool_rtx = ((code == NOT)
38789 ? gen_rtx_NOT (mode, op1)
38790 : gen_rtx_fmt_ee (code, mode, op1, op2));
38792 if (complement_final_p)
38793 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38795 emit_insn (gen_rtx_SET (dest, bool_rtx));
38798 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38799 operations are split immediately during RTL generation to allow for more
38800 optimizations of the AND/IOR/XOR.
38802 OPERANDS is an array containing the destination and two input operands.
38803 CODE is the base operation (AND, IOR, XOR, NOT).
38804 MODE is the machine mode.
38805 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38806 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38807 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38808 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38809 formation of the AND instructions. */
38811 static void
38812 rs6000_split_logical_di (rtx operands[3],
38813 enum rtx_code code,
38814 bool complement_final_p,
38815 bool complement_op1_p,
38816 bool complement_op2_p)
38818 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38819 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38820 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38821 enum hi_lo { hi = 0, lo = 1 };
38822 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38823 size_t i;
38825 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38826 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38827 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38828 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38830 if (code == NOT)
38831 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38832 else
38834 if (GET_CODE (operands[2]) != CONST_INT)
38836 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38837 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38839 else
38841 HOST_WIDE_INT value = INTVAL (operands[2]);
38842 HOST_WIDE_INT value_hi_lo[2];
38844 gcc_assert (!complement_final_p);
38845 gcc_assert (!complement_op1_p);
38846 gcc_assert (!complement_op2_p);
38848 value_hi_lo[hi] = value >> 32;
38849 value_hi_lo[lo] = value & lower_32bits;
38851 for (i = 0; i < 2; i++)
38853 HOST_WIDE_INT sub_value = value_hi_lo[i];
38855 if (sub_value & sign_bit)
38856 sub_value |= upper_32bits;
38858 op2_hi_lo[i] = GEN_INT (sub_value);
38860 /* If this is an AND instruction, check to see if we need to load
38861 the value in a register. */
38862 if (code == AND && sub_value != -1 && sub_value != 0
38863 && !and_operand (op2_hi_lo[i], SImode))
38864 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38869 for (i = 0; i < 2; i++)
38871 /* Split large IOR/XOR operations. */
38872 if ((code == IOR || code == XOR)
38873 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38874 && !complement_final_p
38875 && !complement_op1_p
38876 && !complement_op2_p
38877 && !logical_const_operand (op2_hi_lo[i], SImode))
38879 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38880 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38881 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38882 rtx tmp = gen_reg_rtx (SImode);
38884 /* Make sure the constant is sign extended. */
38885 if ((hi_16bits & sign_bit) != 0)
38886 hi_16bits |= upper_32bits;
38888 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38889 code, SImode, false, false, false);
38891 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38892 code, SImode, false, false, false);
38894 else
38895 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38896 code, SImode, complement_final_p,
38897 complement_op1_p, complement_op2_p);
38900 return;
38903 /* Split the insns that make up boolean operations operating on multiple GPR
38904 registers. The boolean MD patterns ensure that the inputs either are
38905 exactly the same as the output registers, or there is no overlap.
38907 OPERANDS is an array containing the destination and two input operands.
38908 CODE is the base operation (AND, IOR, XOR, NOT).
38909 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38910 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38911 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38913 void
38914 rs6000_split_logical (rtx operands[3],
38915 enum rtx_code code,
38916 bool complement_final_p,
38917 bool complement_op1_p,
38918 bool complement_op2_p)
38920 machine_mode mode = GET_MODE (operands[0]);
38921 machine_mode sub_mode;
38922 rtx op0, op1, op2;
38923 int sub_size, regno0, regno1, nregs, i;
38925 /* If this is DImode, use the specialized version that can run before
38926 register allocation. */
38927 if (mode == DImode && !TARGET_POWERPC64)
38929 rs6000_split_logical_di (operands, code, complement_final_p,
38930 complement_op1_p, complement_op2_p);
38931 return;
38934 op0 = operands[0];
38935 op1 = operands[1];
38936 op2 = (code == NOT) ? NULL_RTX : operands[2];
38937 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38938 sub_size = GET_MODE_SIZE (sub_mode);
38939 regno0 = REGNO (op0);
38940 regno1 = REGNO (op1);
38942 gcc_assert (reload_completed);
38943 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38944 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38946 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38947 gcc_assert (nregs > 1);
38949 if (op2 && REG_P (op2))
38950 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38952 for (i = 0; i < nregs; i++)
38954 int offset = i * sub_size;
38955 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38956 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38957 rtx sub_op2 = ((code == NOT)
38958 ? NULL_RTX
38959 : simplify_subreg (sub_mode, op2, mode, offset));
38961 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38962 complement_final_p, complement_op1_p,
38963 complement_op2_p);
38966 return;
38970 /* Return true if the peephole2 can combine a load involving a combination of
38971 an addis instruction and a load with an offset that can be fused together on
38972 a power8. */
38974 bool
38975 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38976 rtx addis_value, /* addis value. */
38977 rtx target, /* target register that is loaded. */
38978 rtx mem) /* bottom part of the memory addr. */
38980 rtx addr;
38981 rtx base_reg;
38983 /* Validate arguments. */
38984 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38985 return false;
38987 if (!base_reg_operand (target, GET_MODE (target)))
38988 return false;
38990 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38991 return false;
38993 /* Allow sign/zero extension. */
38994 if (GET_CODE (mem) == ZERO_EXTEND
38995 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38996 mem = XEXP (mem, 0);
38998 if (!MEM_P (mem))
38999 return false;
39001 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
39002 return false;
39004 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39005 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
39006 return false;
39008 /* Validate that the register used to load the high value is either the
39009 register being loaded, or we can safely replace its use.
39011 This function is only called from the peephole2 pass and we assume that
39012 there are 2 instructions in the peephole (addis and load), so we want to
39013 check if the target register was not used in the memory address and the
39014 register to hold the addis result is dead after the peephole. */
39015 if (REGNO (addis_reg) != REGNO (target))
39017 if (reg_mentioned_p (target, mem))
39018 return false;
39020 if (!peep2_reg_dead_p (2, addis_reg))
39021 return false;
39023 /* If the target register being loaded is the stack pointer, we must
39024 avoid loading any other value into it, even temporarily. */
39025 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
39026 return false;
39029 base_reg = XEXP (addr, 0);
39030 return REGNO (addis_reg) == REGNO (base_reg);
39033 /* During the peephole2 pass, adjust and expand the insns for a load fusion
39034 sequence. We adjust the addis register to use the target register. If the
39035 load sign extends, we adjust the code to do the zero extending load, and an
39036 explicit sign extension later since the fusion only covers zero extending
39037 loads.
39039 The operands are:
39040 operands[0] register set with addis (to be replaced with target)
39041 operands[1] value set via addis
39042 operands[2] target register being loaded
39043 operands[3] D-form memory reference using operands[0]. */
39045 void
39046 expand_fusion_gpr_load (rtx *operands)
39048 rtx addis_value = operands[1];
39049 rtx target = operands[2];
39050 rtx orig_mem = operands[3];
39051 rtx new_addr, new_mem, orig_addr, offset;
39052 enum rtx_code plus_or_lo_sum;
39053 machine_mode target_mode = GET_MODE (target);
39054 machine_mode extend_mode = target_mode;
39055 machine_mode ptr_mode = Pmode;
39056 enum rtx_code extend = UNKNOWN;
39058 if (GET_CODE (orig_mem) == ZERO_EXTEND
39059 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
39061 extend = GET_CODE (orig_mem);
39062 orig_mem = XEXP (orig_mem, 0);
39063 target_mode = GET_MODE (orig_mem);
39066 gcc_assert (MEM_P (orig_mem));
39068 orig_addr = XEXP (orig_mem, 0);
39069 plus_or_lo_sum = GET_CODE (orig_addr);
39070 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39072 offset = XEXP (orig_addr, 1);
39073 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39074 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39076 if (extend != UNKNOWN)
39077 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
39079 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39080 UNSPEC_FUSION_GPR);
39081 emit_insn (gen_rtx_SET (target, new_mem));
39083 if (extend == SIGN_EXTEND)
39085 int sub_off = ((BYTES_BIG_ENDIAN)
39086 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
39087 : 0);
39088 rtx sign_reg
39089 = simplify_subreg (target_mode, target, extend_mode, sub_off);
39091 emit_insn (gen_rtx_SET (target,
39092 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
39095 return;
39098 /* Emit the addis instruction that will be part of a fused instruction
39099 sequence. */
39101 void
39102 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
39103 const char *mode_name)
39105 rtx fuse_ops[10];
39106 char insn_template[80];
39107 const char *addis_str = NULL;
39108 const char *comment_str = ASM_COMMENT_START;
39110 if (*comment_str == ' ')
39111 comment_str++;
39113 /* Emit the addis instruction. */
39114 fuse_ops[0] = target;
39115 if (satisfies_constraint_L (addis_value))
39117 fuse_ops[1] = addis_value;
39118 addis_str = "lis %0,%v1";
39121 else if (GET_CODE (addis_value) == PLUS)
39123 rtx op0 = XEXP (addis_value, 0);
39124 rtx op1 = XEXP (addis_value, 1);
39126 if (REG_P (op0) && CONST_INT_P (op1)
39127 && satisfies_constraint_L (op1))
39129 fuse_ops[1] = op0;
39130 fuse_ops[2] = op1;
39131 addis_str = "addis %0,%1,%v2";
39135 else if (GET_CODE (addis_value) == HIGH)
39137 rtx value = XEXP (addis_value, 0);
39138 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
39140 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
39141 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
39142 if (TARGET_ELF)
39143 addis_str = "addis %0,%2,%1@toc@ha";
39145 else if (TARGET_XCOFF)
39146 addis_str = "addis %0,%1@u(%2)";
39148 else
39149 gcc_unreachable ();
39152 else if (GET_CODE (value) == PLUS)
39154 rtx op0 = XEXP (value, 0);
39155 rtx op1 = XEXP (value, 1);
39157 if (GET_CODE (op0) == UNSPEC
39158 && XINT (op0, 1) == UNSPEC_TOCREL
39159 && CONST_INT_P (op1))
39161 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
39162 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
39163 fuse_ops[3] = op1;
39164 if (TARGET_ELF)
39165 addis_str = "addis %0,%2,%1+%3@toc@ha";
39167 else if (TARGET_XCOFF)
39168 addis_str = "addis %0,%1+%3@u(%2)";
39170 else
39171 gcc_unreachable ();
39175 else if (satisfies_constraint_L (value))
39177 fuse_ops[1] = value;
39178 addis_str = "lis %0,%v1";
39181 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
39183 fuse_ops[1] = value;
39184 addis_str = "lis %0,%1@ha";
39188 if (!addis_str)
39189 fatal_insn ("Could not generate addis value for fusion", addis_value);
39191 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
39192 comment, mode_name);
39193 output_asm_insn (insn_template, fuse_ops);
39196 /* Emit a D-form load or store instruction that is the second instruction
39197 of a fusion sequence. */
39199 void
39200 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
39201 const char *insn_str)
39203 rtx fuse_ops[10];
39204 char insn_template[80];
39206 fuse_ops[0] = load_store_reg;
39207 fuse_ops[1] = addis_reg;
39209 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
39211 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
39212 fuse_ops[2] = offset;
39213 output_asm_insn (insn_template, fuse_ops);
39216 else if (GET_CODE (offset) == UNSPEC
39217 && XINT (offset, 1) == UNSPEC_TOCREL)
39219 if (TARGET_ELF)
39220 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
39222 else if (TARGET_XCOFF)
39223 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39225 else
39226 gcc_unreachable ();
39228 fuse_ops[2] = XVECEXP (offset, 0, 0);
39229 output_asm_insn (insn_template, fuse_ops);
39232 else if (GET_CODE (offset) == PLUS
39233 && GET_CODE (XEXP (offset, 0)) == UNSPEC
39234 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
39235 && CONST_INT_P (XEXP (offset, 1)))
39237 rtx tocrel_unspec = XEXP (offset, 0);
39238 if (TARGET_ELF)
39239 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
39241 else if (TARGET_XCOFF)
39242 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
39244 else
39245 gcc_unreachable ();
39247 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
39248 fuse_ops[3] = XEXP (offset, 1);
39249 output_asm_insn (insn_template, fuse_ops);
39252 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
39254 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
39256 fuse_ops[2] = offset;
39257 output_asm_insn (insn_template, fuse_ops);
39260 else
39261 fatal_insn ("Unable to generate load/store offset for fusion", offset);
39263 return;
39266 /* Wrap a TOC address that can be fused to indicate that special fusion
39267 processing is needed. */
39270 fusion_wrap_memory_address (rtx old_mem)
39272 rtx old_addr = XEXP (old_mem, 0);
39273 rtvec v = gen_rtvec (1, old_addr);
39274 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
39275 return replace_equiv_address_nv (old_mem, new_addr, false);
39278 /* Given an address, convert it into the addis and load offset parts. Addresses
39279 created during the peephole2 process look like:
39280 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
39281 (unspec [(...)] UNSPEC_TOCREL))
39283 Addresses created via toc fusion look like:
39284 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
39286 static void
39287 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
39289 rtx hi, lo;
39291 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
39293 lo = XVECEXP (addr, 0, 0);
39294 hi = gen_rtx_HIGH (Pmode, lo);
39296 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
39298 hi = XEXP (addr, 0);
39299 lo = XEXP (addr, 1);
39301 else
39302 gcc_unreachable ();
39304 *p_hi = hi;
39305 *p_lo = lo;
39308 /* Return a string to fuse an addis instruction with a gpr load to the same
39309 register that we loaded up the addis instruction. The address that is used
39310 is the logical address that was formed during peephole2:
39311 (lo_sum (high) (low-part))
39313 Or the address is the TOC address that is wrapped before register allocation:
39314 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
39316 The code is complicated, so we call output_asm_insn directly, and just
39317 return "". */
39319 const char *
39320 emit_fusion_gpr_load (rtx target, rtx mem)
39322 rtx addis_value;
39323 rtx addr;
39324 rtx load_offset;
39325 const char *load_str = NULL;
39326 const char *mode_name = NULL;
39327 machine_mode mode;
39329 if (GET_CODE (mem) == ZERO_EXTEND)
39330 mem = XEXP (mem, 0);
39332 gcc_assert (REG_P (target) && MEM_P (mem));
39334 addr = XEXP (mem, 0);
39335 fusion_split_address (addr, &addis_value, &load_offset);
39337 /* Now emit the load instruction to the same register. */
39338 mode = GET_MODE (mem);
39339 switch (mode)
39341 case QImode:
39342 mode_name = "char";
39343 load_str = "lbz";
39344 break;
39346 case HImode:
39347 mode_name = "short";
39348 load_str = "lhz";
39349 break;
39351 case SImode:
39352 case SFmode:
39353 mode_name = (mode == SFmode) ? "float" : "int";
39354 load_str = "lwz";
39355 break;
39357 case DImode:
39358 case DFmode:
39359 gcc_assert (TARGET_POWERPC64);
39360 mode_name = (mode == DFmode) ? "double" : "long";
39361 load_str = "ld";
39362 break;
39364 default:
39365 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
39368 /* Emit the addis instruction. */
39369 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
39371 /* Emit the D-form load instruction. */
39372 emit_fusion_load_store (target, target, load_offset, load_str);
39374 return "";
39378 /* Return true if the peephole2 can combine a load/store involving a
39379 combination of an addis instruction and the memory operation. This was
39380 added to the ISA 3.0 (power9) hardware. */
39382 bool
39383 fusion_p9_p (rtx addis_reg, /* register set via addis. */
39384 rtx addis_value, /* addis value. */
39385 rtx dest, /* destination (memory or register). */
39386 rtx src) /* source (register or memory). */
39388 rtx addr, mem, offset;
39389 enum machine_mode mode = GET_MODE (src);
39391 /* Validate arguments. */
39392 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
39393 return false;
39395 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
39396 return false;
39398 /* Ignore extend operations that are part of the load. */
39399 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
39400 src = XEXP (src, 0);
39402 /* Test for memory<-register or register<-memory. */
39403 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
39405 if (!MEM_P (dest))
39406 return false;
39408 mem = dest;
39411 else if (MEM_P (src))
39413 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
39414 return false;
39416 mem = src;
39419 else
39420 return false;
39422 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
39423 if (GET_CODE (addr) == PLUS)
39425 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39426 return false;
39428 return satisfies_constraint_I (XEXP (addr, 1));
39431 else if (GET_CODE (addr) == LO_SUM)
39433 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
39434 return false;
39436 offset = XEXP (addr, 1);
39437 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
39438 return small_toc_ref (offset, GET_MODE (offset));
39440 else if (TARGET_ELF && !TARGET_POWERPC64)
39441 return CONSTANT_P (offset);
39444 return false;
39447 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39448 load sequence.
39450 The operands are:
39451 operands[0] register set with addis
39452 operands[1] value set via addis
39453 operands[2] target register being loaded
39454 operands[3] D-form memory reference using operands[0].
39456 This is similar to the fusion introduced with power8, except it scales to
39457 both loads/stores and does not require the result register to be the same as
39458 the base register. At the moment, we only do this if register set with addis
39459 is dead. */
39461 void
39462 expand_fusion_p9_load (rtx *operands)
39464 rtx tmp_reg = operands[0];
39465 rtx addis_value = operands[1];
39466 rtx target = operands[2];
39467 rtx orig_mem = operands[3];
39468 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
39469 enum rtx_code plus_or_lo_sum;
39470 machine_mode target_mode = GET_MODE (target);
39471 machine_mode extend_mode = target_mode;
39472 machine_mode ptr_mode = Pmode;
39473 enum rtx_code extend = UNKNOWN;
39475 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
39477 extend = GET_CODE (orig_mem);
39478 orig_mem = XEXP (orig_mem, 0);
39479 target_mode = GET_MODE (orig_mem);
39482 gcc_assert (MEM_P (orig_mem));
39484 orig_addr = XEXP (orig_mem, 0);
39485 plus_or_lo_sum = GET_CODE (orig_addr);
39486 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39488 offset = XEXP (orig_addr, 1);
39489 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39490 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39492 if (extend != UNKNOWN)
39493 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
39495 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39496 UNSPEC_FUSION_P9);
39498 set = gen_rtx_SET (target, new_mem);
39499 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39500 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39501 emit_insn (insn);
39503 return;
39506 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39507 store sequence.
39509 The operands are:
39510 operands[0] register set with addis
39511 operands[1] value set via addis
39512 operands[2] target D-form memory being stored to
39513 operands[3] register being stored
39515 This is similar to the fusion introduced with power8, except it scales to
39516 both loads/stores and does not require the result register to be the same as
39517 the base register. At the moment, we only do this if register set with addis
39518 is dead. */
39520 void
39521 expand_fusion_p9_store (rtx *operands)
39523 rtx tmp_reg = operands[0];
39524 rtx addis_value = operands[1];
39525 rtx orig_mem = operands[2];
39526 rtx src = operands[3];
39527 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39528 enum rtx_code plus_or_lo_sum;
39529 machine_mode target_mode = GET_MODE (orig_mem);
39530 machine_mode ptr_mode = Pmode;
39532 gcc_assert (MEM_P (orig_mem));
39534 orig_addr = XEXP (orig_mem, 0);
39535 plus_or_lo_sum = GET_CODE (orig_addr);
39536 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39538 offset = XEXP (orig_addr, 1);
39539 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39540 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39542 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39543 UNSPEC_FUSION_P9);
39545 set = gen_rtx_SET (new_mem, new_src);
39546 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39547 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39548 emit_insn (insn);
39550 return;
39553 /* Return a string to fuse an addis instruction with a load using extended
39554 fusion. The address that is used is the logical address that was formed
39555 during peephole2: (lo_sum (high) (low-part))
39557 The code is complicated, so we call output_asm_insn directly, and just
39558 return "". */
39560 const char *
39561 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39563 enum machine_mode mode = GET_MODE (reg);
39564 rtx hi;
39565 rtx lo;
39566 rtx addr;
39567 const char *load_string;
39568 int r;
39570 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39572 mem = XEXP (mem, 0);
39573 mode = GET_MODE (mem);
39576 if (GET_CODE (reg) == SUBREG)
39578 gcc_assert (SUBREG_BYTE (reg) == 0);
39579 reg = SUBREG_REG (reg);
39582 if (!REG_P (reg))
39583 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39585 r = REGNO (reg);
39586 if (FP_REGNO_P (r))
39588 if (mode == SFmode)
39589 load_string = "lfs";
39590 else if (mode == DFmode || mode == DImode)
39591 load_string = "lfd";
39592 else
39593 gcc_unreachable ();
39595 else if (INT_REGNO_P (r))
39597 switch (mode)
39599 case QImode:
39600 load_string = "lbz";
39601 break;
39602 case HImode:
39603 load_string = "lhz";
39604 break;
39605 case SImode:
39606 case SFmode:
39607 load_string = "lwz";
39608 break;
39609 case DImode:
39610 case DFmode:
39611 if (!TARGET_POWERPC64)
39612 gcc_unreachable ();
39613 load_string = "ld";
39614 break;
39615 default:
39616 gcc_unreachable ();
39619 else
39620 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39622 if (!MEM_P (mem))
39623 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39625 addr = XEXP (mem, 0);
39626 fusion_split_address (addr, &hi, &lo);
39628 /* Emit the addis instruction. */
39629 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
39631 /* Emit the D-form load instruction. */
39632 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39634 return "";
39637 /* Return a string to fuse an addis instruction with a store using extended
39638 fusion. The address that is used is the logical address that was formed
39639 during peephole2: (lo_sum (high) (low-part))
39641 The code is complicated, so we call output_asm_insn directly, and just
39642 return "". */
39644 const char *
39645 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39647 enum machine_mode mode = GET_MODE (reg);
39648 rtx hi;
39649 rtx lo;
39650 rtx addr;
39651 const char *store_string;
39652 int r;
39654 if (GET_CODE (reg) == SUBREG)
39656 gcc_assert (SUBREG_BYTE (reg) == 0);
39657 reg = SUBREG_REG (reg);
39660 if (!REG_P (reg))
39661 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39663 r = REGNO (reg);
39664 if (FP_REGNO_P (r))
39666 if (mode == SFmode)
39667 store_string = "stfs";
39668 else if (mode == DFmode)
39669 store_string = "stfd";
39670 else
39671 gcc_unreachable ();
39673 else if (INT_REGNO_P (r))
39675 switch (mode)
39677 case QImode:
39678 store_string = "stb";
39679 break;
39680 case HImode:
39681 store_string = "sth";
39682 break;
39683 case SImode:
39684 case SFmode:
39685 store_string = "stw";
39686 break;
39687 case DImode:
39688 case DFmode:
39689 if (!TARGET_POWERPC64)
39690 gcc_unreachable ();
39691 store_string = "std";
39692 break;
39693 default:
39694 gcc_unreachable ();
39697 else
39698 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39700 if (!MEM_P (mem))
39701 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39703 addr = XEXP (mem, 0);
39704 fusion_split_address (addr, &hi, &lo);
39706 /* Emit the addis instruction. */
39707 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39709 /* Emit the D-form load instruction. */
39710 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39712 return "";
39716 /* Analyze vector computations and remove unnecessary doubleword
39717 swaps (xxswapdi instructions). This pass is performed only
39718 for little-endian VSX code generation.
39720 For this specific case, loads and stores of 4x32 and 2x64 vectors
39721 are inefficient. These are implemented using the lvx2dx and
39722 stvx2dx instructions, which invert the order of doublewords in
39723 a vector register. Thus the code generation inserts an xxswapdi
39724 after each such load, and prior to each such store. (For spill
39725 code after register assignment, an additional xxswapdi is inserted
39726 following each store in order to return a hard register to its
39727 unpermuted value.)
39729 The extra xxswapdi instructions reduce performance. This can be
39730 particularly bad for vectorized code. The purpose of this pass
39731 is to reduce the number of xxswapdi instructions required for
39732 correctness.
39734 The primary insight is that much code that operates on vectors
39735 does not care about the relative order of elements in a register,
39736 so long as the correct memory order is preserved. If we have
39737 a computation where all input values are provided by lvxd2x/xxswapdi
39738 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39739 and all intermediate computations are pure SIMD (independent of
39740 element order), then all the xxswapdi's associated with the loads
39741 and stores may be removed.
39743 This pass uses some of the infrastructure and logical ideas from
39744 the "web" pass in web.c. We create maximal webs of computations
39745 fitting the description above using union-find. Each such web is
39746 then optimized by removing its unnecessary xxswapdi instructions.
39748 The pass is placed prior to global optimization so that we can
39749 perform the optimization in the safest and simplest way possible;
39750 that is, by replacing each xxswapdi insn with a register copy insn.
39751 Subsequent forward propagation will remove copies where possible.
39753 There are some operations sensitive to element order for which we
39754 can still allow the operation, provided we modify those operations.
39755 These include CONST_VECTORs, for which we must swap the first and
39756 second halves of the constant vector; and SUBREGs, for which we
39757 must adjust the byte offset to account for the swapped doublewords.
39758 A remaining opportunity would be non-immediate-form splats, for
39759 which we should adjust the selected lane of the input. We should
39760 also make code generation adjustments for sum-across operations,
39761 since this is a common vectorizer reduction.
39763 Because we run prior to the first split, we can see loads and stores
39764 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39765 vector loads and stores that have not yet been split into a permuting
39766 load/store and a swap. (One way this can happen is with a builtin
39767 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39768 than deleting a swap, we convert the load/store into a permuting
39769 load/store (which effectively removes the swap). */
39771 /* Notes on Permutes
39773 We do not currently handle computations that contain permutes. There
39774 is a general transformation that can be performed correctly, but it
39775 may introduce more expensive code than it replaces. To handle these
39776 would require a cost model to determine when to perform the optimization.
39777 This commentary records how this could be done if desired.
39779 The most general permute is something like this (example for V16QI):
39781 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39782 (parallel [(const_int a0) (const_int a1)
39784 (const_int a14) (const_int a15)]))
39786 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39787 to produce in the result.
39789 Regardless of mode, we can convert the PARALLEL to a mask of 16
39790 byte-element selectors. Let's call this M, with M[i] representing
39791 the ith byte-element selector value. Then if we swap doublewords
39792 throughout the computation, we can get correct behavior by replacing
39793 M with M' as follows:
39795 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39796 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39798 This seems promising at first, since we are just replacing one mask
39799 with another. But certain masks are preferable to others. If M
39800 is a mask that matches a vmrghh pattern, for example, M' certainly
39801 will not. Instead of a single vmrghh, we would generate a load of
39802 M' and a vperm. So we would need to know how many xxswapd's we can
39803 remove as a result of this transformation to determine if it's
39804 profitable; and preferably the logic would need to be aware of all
39805 the special preferable masks.
39807 Another form of permute is an UNSPEC_VPERM, in which the mask is
39808 already in a register. In some cases, this mask may be a constant
39809 that we can discover with ud-chains, in which case the above
39810 transformation is ok. However, the common usage here is for the
39811 mask to be produced by an UNSPEC_LVSL, in which case the mask
39812 cannot be known at compile time. In such a case we would have to
39813 generate several instructions to compute M' as above at run time,
39814 and a cost model is needed again.
39816 However, when the mask M for an UNSPEC_VPERM is loaded from the
39817 constant pool, we can replace M with M' as above at no cost
39818 beyond adding a constant pool entry. */
39820 /* This is based on the union-find logic in web.c. web_entry_base is
39821 defined in df.h. */
39822 class swap_web_entry : public web_entry_base
39824 public:
39825 /* Pointer to the insn. */
39826 rtx_insn *insn;
39827 /* Set if insn contains a mention of a vector register. All other
39828 fields are undefined if this field is unset. */
39829 unsigned int is_relevant : 1;
39830 /* Set if insn is a load. */
39831 unsigned int is_load : 1;
39832 /* Set if insn is a store. */
39833 unsigned int is_store : 1;
39834 /* Set if insn is a doubleword swap. This can either be a register swap
39835 or a permuting load or store (test is_load and is_store for this). */
39836 unsigned int is_swap : 1;
39837 /* Set if the insn has a live-in use of a parameter register. */
39838 unsigned int is_live_in : 1;
39839 /* Set if the insn has a live-out def of a return register. */
39840 unsigned int is_live_out : 1;
39841 /* Set if the insn contains a subreg reference of a vector register. */
39842 unsigned int contains_subreg : 1;
39843 /* Set if the insn contains a 128-bit integer operand. */
39844 unsigned int is_128_int : 1;
39845 /* Set if this is a call-insn. */
39846 unsigned int is_call : 1;
39847 /* Set if this insn does not perform a vector operation for which
39848 element order matters, or if we know how to fix it up if it does.
39849 Undefined if is_swap is set. */
39850 unsigned int is_swappable : 1;
39851 /* A nonzero value indicates what kind of special handling for this
39852 insn is required if doublewords are swapped. Undefined if
39853 is_swappable is not set. */
39854 unsigned int special_handling : 4;
39855 /* Set if the web represented by this entry cannot be optimized. */
39856 unsigned int web_not_optimizable : 1;
39857 /* Set if this insn should be deleted. */
39858 unsigned int will_delete : 1;
39861 enum special_handling_values {
39862 SH_NONE = 0,
39863 SH_CONST_VECTOR,
39864 SH_SUBREG,
39865 SH_NOSWAP_LD,
39866 SH_NOSWAP_ST,
39867 SH_EXTRACT,
39868 SH_SPLAT,
39869 SH_XXPERMDI,
39870 SH_CONCAT,
39871 SH_VPERM
39874 /* Union INSN with all insns containing definitions that reach USE.
39875 Detect whether USE is live-in to the current function. */
39876 static void
39877 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
39879 struct df_link *link = DF_REF_CHAIN (use);
39881 if (!link)
39882 insn_entry[INSN_UID (insn)].is_live_in = 1;
39884 while (link)
39886 if (DF_REF_IS_ARTIFICIAL (link->ref))
39887 insn_entry[INSN_UID (insn)].is_live_in = 1;
39889 if (DF_REF_INSN_INFO (link->ref))
39891 rtx def_insn = DF_REF_INSN (link->ref);
39892 (void)unionfind_union (insn_entry + INSN_UID (insn),
39893 insn_entry + INSN_UID (def_insn));
39896 link = link->next;
39900 /* Union INSN with all insns containing uses reached from DEF.
39901 Detect whether DEF is live-out from the current function. */
39902 static void
39903 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
39905 struct df_link *link = DF_REF_CHAIN (def);
39907 if (!link)
39908 insn_entry[INSN_UID (insn)].is_live_out = 1;
39910 while (link)
39912 /* This could be an eh use or some other artificial use;
39913 we treat these all the same (killing the optimization). */
39914 if (DF_REF_IS_ARTIFICIAL (link->ref))
39915 insn_entry[INSN_UID (insn)].is_live_out = 1;
39917 if (DF_REF_INSN_INFO (link->ref))
39919 rtx use_insn = DF_REF_INSN (link->ref);
39920 (void)unionfind_union (insn_entry + INSN_UID (insn),
39921 insn_entry + INSN_UID (use_insn));
39924 link = link->next;
39928 /* Return 1 iff INSN is a load insn, including permuting loads that
39929 represent an lvxd2x instruction; else return 0. */
39930 static unsigned int
39931 insn_is_load_p (rtx insn)
39933 rtx body = PATTERN (insn);
39935 if (GET_CODE (body) == SET)
39937 if (GET_CODE (SET_SRC (body)) == MEM)
39938 return 1;
39940 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
39941 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
39942 return 1;
39944 return 0;
39947 if (GET_CODE (body) != PARALLEL)
39948 return 0;
39950 rtx set = XVECEXP (body, 0, 0);
39952 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
39953 return 1;
39955 return 0;
39958 /* Return 1 iff INSN is a store insn, including permuting stores that
39959 represent an stvxd2x instruction; else return 0. */
39960 static unsigned int
39961 insn_is_store_p (rtx insn)
39963 rtx body = PATTERN (insn);
39964 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
39965 return 1;
39966 if (GET_CODE (body) != PARALLEL)
39967 return 0;
39968 rtx set = XVECEXP (body, 0, 0);
39969 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
39970 return 1;
39971 return 0;
39974 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39975 a permuting load, or a permuting store. */
39976 static unsigned int
39977 insn_is_swap_p (rtx insn)
39979 rtx body = PATTERN (insn);
39980 if (GET_CODE (body) != SET)
39981 return 0;
39982 rtx rhs = SET_SRC (body);
39983 if (GET_CODE (rhs) != VEC_SELECT)
39984 return 0;
39985 rtx parallel = XEXP (rhs, 1);
39986 if (GET_CODE (parallel) != PARALLEL)
39987 return 0;
39988 unsigned int len = XVECLEN (parallel, 0);
39989 if (len != 2 && len != 4 && len != 8 && len != 16)
39990 return 0;
39991 for (unsigned int i = 0; i < len / 2; ++i)
39993 rtx op = XVECEXP (parallel, 0, i);
39994 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
39995 return 0;
39997 for (unsigned int i = len / 2; i < len; ++i)
39999 rtx op = XVECEXP (parallel, 0, i);
40000 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
40001 return 0;
40003 return 1;
40006 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
40007 static bool
40008 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
40010 unsigned uid = INSN_UID (insn);
40011 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
40012 return false;
40014 /* Find the unique use in the swap and locate its def. If the def
40015 isn't unique, punt. */
40016 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40017 df_ref use;
40018 FOR_EACH_INSN_INFO_USE (use, insn_info)
40020 struct df_link *def_link = DF_REF_CHAIN (use);
40021 if (!def_link || def_link->next)
40022 return false;
40024 rtx def_insn = DF_REF_INSN (def_link->ref);
40025 unsigned uid2 = INSN_UID (def_insn);
40026 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
40027 return false;
40029 rtx body = PATTERN (def_insn);
40030 if (GET_CODE (body) != SET
40031 || GET_CODE (SET_SRC (body)) != VEC_SELECT
40032 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
40033 return false;
40035 rtx mem = XEXP (SET_SRC (body), 0);
40036 rtx base_reg = XEXP (mem, 0);
40038 df_ref base_use;
40039 insn_info = DF_INSN_INFO_GET (def_insn);
40040 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40042 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40043 continue;
40045 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40046 if (!base_def_link || base_def_link->next)
40047 return false;
40049 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
40050 rtx tocrel_body = PATTERN (tocrel_insn);
40051 rtx base, offset;
40052 if (GET_CODE (tocrel_body) != SET)
40053 return false;
40054 /* There is an extra level of indirection for small/large
40055 code models. */
40056 rtx tocrel_expr = SET_SRC (tocrel_body);
40057 if (GET_CODE (tocrel_expr) == MEM)
40058 tocrel_expr = XEXP (tocrel_expr, 0);
40059 if (!toc_relative_expr_p (tocrel_expr, false))
40060 return false;
40061 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40062 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
40063 return false;
40066 return true;
40069 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
40070 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
40071 static bool
40072 v2df_reduction_p (rtx op)
40074 if (GET_MODE (op) != V2DFmode)
40075 return false;
40077 enum rtx_code code = GET_CODE (op);
40078 if (code != PLUS && code != SMIN && code != SMAX)
40079 return false;
40081 rtx concat = XEXP (op, 0);
40082 if (GET_CODE (concat) != VEC_CONCAT)
40083 return false;
40085 rtx select0 = XEXP (concat, 0);
40086 rtx select1 = XEXP (concat, 1);
40087 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
40088 return false;
40090 rtx reg0 = XEXP (select0, 0);
40091 rtx reg1 = XEXP (select1, 0);
40092 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
40093 return false;
40095 rtx parallel0 = XEXP (select0, 1);
40096 rtx parallel1 = XEXP (select1, 1);
40097 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
40098 return false;
40100 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
40101 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
40102 return false;
40104 return true;
40107 /* Return 1 iff OP is an operand that will not be affected by having
40108 vector doublewords swapped in memory. */
40109 static unsigned int
40110 rtx_is_swappable_p (rtx op, unsigned int *special)
40112 enum rtx_code code = GET_CODE (op);
40113 int i, j;
40114 rtx parallel;
40116 switch (code)
40118 case LABEL_REF:
40119 case SYMBOL_REF:
40120 case CLOBBER:
40121 case REG:
40122 return 1;
40124 case VEC_CONCAT:
40125 case ASM_INPUT:
40126 case ASM_OPERANDS:
40127 return 0;
40129 case CONST_VECTOR:
40131 *special = SH_CONST_VECTOR;
40132 return 1;
40135 case VEC_DUPLICATE:
40136 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
40137 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
40138 it represents a vector splat for which we can do special
40139 handling. */
40140 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
40141 return 1;
40142 else if (REG_P (XEXP (op, 0))
40143 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40144 /* This catches V2DF and V2DI splat, at a minimum. */
40145 return 1;
40146 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
40147 && REG_P (XEXP (XEXP (op, 0), 0))
40148 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
40149 /* This catches splat of a truncated value. */
40150 return 1;
40151 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
40152 /* If the duplicated item is from a select, defer to the select
40153 processing to see if we can change the lane for the splat. */
40154 return rtx_is_swappable_p (XEXP (op, 0), special);
40155 else
40156 return 0;
40158 case VEC_SELECT:
40159 /* A vec_extract operation is ok if we change the lane. */
40160 if (GET_CODE (XEXP (op, 0)) == REG
40161 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
40162 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40163 && XVECLEN (parallel, 0) == 1
40164 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
40166 *special = SH_EXTRACT;
40167 return 1;
40169 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
40170 XXPERMDI is a swap operation, it will be identified by
40171 insn_is_swap_p and therefore we won't get here. */
40172 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
40173 && (GET_MODE (XEXP (op, 0)) == V4DFmode
40174 || GET_MODE (XEXP (op, 0)) == V4DImode)
40175 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
40176 && XVECLEN (parallel, 0) == 2
40177 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
40178 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
40180 *special = SH_XXPERMDI;
40181 return 1;
40183 else if (v2df_reduction_p (op))
40184 return 1;
40185 else
40186 return 0;
40188 case UNSPEC:
40190 /* Various operations are unsafe for this optimization, at least
40191 without significant additional work. Permutes are obviously
40192 problematic, as both the permute control vector and the ordering
40193 of the target values are invalidated by doubleword swapping.
40194 Vector pack and unpack modify the number of vector lanes.
40195 Merge-high/low will not operate correctly on swapped operands.
40196 Vector shifts across element boundaries are clearly uncool,
40197 as are vector select and concatenate operations. Vector
40198 sum-across instructions define one operand with a specific
40199 order-dependent element, so additional fixup code would be
40200 needed to make those work. Vector set and non-immediate-form
40201 vector splat are element-order sensitive. A few of these
40202 cases might be workable with special handling if required.
40203 Adding cost modeling would be appropriate in some cases. */
40204 int val = XINT (op, 1);
40205 switch (val)
40207 default:
40208 break;
40209 case UNSPEC_VMRGH_DIRECT:
40210 case UNSPEC_VMRGL_DIRECT:
40211 case UNSPEC_VPACK_SIGN_SIGN_SAT:
40212 case UNSPEC_VPACK_SIGN_UNS_SAT:
40213 case UNSPEC_VPACK_UNS_UNS_MOD:
40214 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
40215 case UNSPEC_VPACK_UNS_UNS_SAT:
40216 case UNSPEC_VPERM:
40217 case UNSPEC_VPERM_UNS:
40218 case UNSPEC_VPERMHI:
40219 case UNSPEC_VPERMSI:
40220 case UNSPEC_VPKPX:
40221 case UNSPEC_VSLDOI:
40222 case UNSPEC_VSLO:
40223 case UNSPEC_VSRO:
40224 case UNSPEC_VSUM2SWS:
40225 case UNSPEC_VSUM4S:
40226 case UNSPEC_VSUM4UBS:
40227 case UNSPEC_VSUMSWS:
40228 case UNSPEC_VSUMSWS_DIRECT:
40229 case UNSPEC_VSX_CONCAT:
40230 case UNSPEC_VSX_SET:
40231 case UNSPEC_VSX_SLDWI:
40232 case UNSPEC_VUNPACK_HI_SIGN:
40233 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
40234 case UNSPEC_VUNPACK_LO_SIGN:
40235 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
40236 case UNSPEC_VUPKHPX:
40237 case UNSPEC_VUPKHS_V4SF:
40238 case UNSPEC_VUPKHU_V4SF:
40239 case UNSPEC_VUPKLPX:
40240 case UNSPEC_VUPKLS_V4SF:
40241 case UNSPEC_VUPKLU_V4SF:
40242 case UNSPEC_VSX_CVDPSPN:
40243 case UNSPEC_VSX_CVSPDP:
40244 case UNSPEC_VSX_CVSPDPN:
40245 case UNSPEC_VSX_EXTRACT:
40246 case UNSPEC_VSX_VSLO:
40247 case UNSPEC_VSX_VEC_INIT:
40248 return 0;
40249 case UNSPEC_VSPLT_DIRECT:
40250 *special = SH_SPLAT;
40251 return 1;
40252 case UNSPEC_REDUC_PLUS:
40253 case UNSPEC_REDUC:
40254 return 1;
40258 default:
40259 break;
40262 const char *fmt = GET_RTX_FORMAT (code);
40263 int ok = 1;
40265 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40266 if (fmt[i] == 'e' || fmt[i] == 'u')
40268 unsigned int special_op = SH_NONE;
40269 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
40270 if (special_op == SH_NONE)
40271 continue;
40272 /* Ensure we never have two kinds of special handling
40273 for the same insn. */
40274 if (*special != SH_NONE && *special != special_op)
40275 return 0;
40276 *special = special_op;
40278 else if (fmt[i] == 'E')
40279 for (j = 0; j < XVECLEN (op, i); ++j)
40281 unsigned int special_op = SH_NONE;
40282 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
40283 if (special_op == SH_NONE)
40284 continue;
40285 /* Ensure we never have two kinds of special handling
40286 for the same insn. */
40287 if (*special != SH_NONE && *special != special_op)
40288 return 0;
40289 *special = special_op;
40292 return ok;
40295 /* Return 1 iff INSN is an operand that will not be affected by
40296 having vector doublewords swapped in memory (in which case
40297 *SPECIAL is unchanged), or that can be modified to be correct
40298 if vector doublewords are swapped in memory (in which case
40299 *SPECIAL is changed to a value indicating how). */
40300 static unsigned int
40301 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
40302 unsigned int *special)
40304 /* Calls are always bad. */
40305 if (GET_CODE (insn) == CALL_INSN)
40306 return 0;
40308 /* Loads and stores seen here are not permuting, but we can still
40309 fix them up by converting them to permuting ones. Exceptions:
40310 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
40311 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
40312 for the SET source. Also we must now make an exception for lvx
40313 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
40314 explicit "& -16") since this leads to unrecognizable insns. */
40315 rtx body = PATTERN (insn);
40316 int i = INSN_UID (insn);
40318 if (insn_entry[i].is_load)
40320 if (GET_CODE (body) == SET)
40322 rtx rhs = SET_SRC (body);
40323 gcc_assert (GET_CODE (rhs) == MEM);
40324 if (GET_CODE (XEXP (rhs, 0)) == AND)
40325 return 0;
40327 *special = SH_NOSWAP_LD;
40328 return 1;
40330 else
40331 return 0;
40334 if (insn_entry[i].is_store)
40336 if (GET_CODE (body) == SET
40337 && GET_CODE (SET_SRC (body)) != UNSPEC)
40339 rtx lhs = SET_DEST (body);
40340 gcc_assert (GET_CODE (lhs) == MEM);
40341 if (GET_CODE (XEXP (lhs, 0)) == AND)
40342 return 0;
40344 *special = SH_NOSWAP_ST;
40345 return 1;
40347 else
40348 return 0;
40351 /* A convert to single precision can be left as is provided that
40352 all of its uses are in xxspltw instructions that splat BE element
40353 zero. */
40354 if (GET_CODE (body) == SET
40355 && GET_CODE (SET_SRC (body)) == UNSPEC
40356 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
40358 df_ref def;
40359 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40361 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40363 struct df_link *link = DF_REF_CHAIN (def);
40364 if (!link)
40365 return 0;
40367 for (; link; link = link->next) {
40368 rtx use_insn = DF_REF_INSN (link->ref);
40369 rtx use_body = PATTERN (use_insn);
40370 if (GET_CODE (use_body) != SET
40371 || GET_CODE (SET_SRC (use_body)) != UNSPEC
40372 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
40373 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
40374 return 0;
40378 return 1;
40381 /* A concatenation of two doublewords is ok if we reverse the
40382 order of the inputs. */
40383 if (GET_CODE (body) == SET
40384 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
40385 && (GET_MODE (SET_SRC (body)) == V2DFmode
40386 || GET_MODE (SET_SRC (body)) == V2DImode))
40388 *special = SH_CONCAT;
40389 return 1;
40392 /* V2DF reductions are always swappable. */
40393 if (GET_CODE (body) == PARALLEL)
40395 rtx expr = XVECEXP (body, 0, 0);
40396 if (GET_CODE (expr) == SET
40397 && v2df_reduction_p (SET_SRC (expr)))
40398 return 1;
40401 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
40402 constant pool. */
40403 if (GET_CODE (body) == SET
40404 && GET_CODE (SET_SRC (body)) == UNSPEC
40405 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
40406 && XVECLEN (SET_SRC (body), 0) == 3
40407 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
40409 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
40410 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40411 df_ref use;
40412 FOR_EACH_INSN_INFO_USE (use, insn_info)
40413 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40415 struct df_link *def_link = DF_REF_CHAIN (use);
40416 /* Punt if multiple definitions for this reg. */
40417 if (def_link && !def_link->next &&
40418 const_load_sequence_p (insn_entry,
40419 DF_REF_INSN (def_link->ref)))
40421 *special = SH_VPERM;
40422 return 1;
40427 /* Otherwise check the operands for vector lane violations. */
40428 return rtx_is_swappable_p (body, special);
40431 enum chain_purpose { FOR_LOADS, FOR_STORES };
40433 /* Return true if the UD or DU chain headed by LINK is non-empty,
40434 and every entry on the chain references an insn that is a
40435 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
40436 register swap must have only permuting loads as reaching defs.
40437 If PURPOSE is FOR_STORES, each such register swap must have only
40438 register swaps or permuting stores as reached uses. */
40439 static bool
40440 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
40441 enum chain_purpose purpose)
40443 if (!link)
40444 return false;
40446 for (; link; link = link->next)
40448 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
40449 continue;
40451 if (DF_REF_IS_ARTIFICIAL (link->ref))
40452 return false;
40454 rtx reached_insn = DF_REF_INSN (link->ref);
40455 unsigned uid = INSN_UID (reached_insn);
40456 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
40458 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
40459 || insn_entry[uid].is_store)
40460 return false;
40462 if (purpose == FOR_LOADS)
40464 df_ref use;
40465 FOR_EACH_INSN_INFO_USE (use, insn_info)
40467 struct df_link *swap_link = DF_REF_CHAIN (use);
40469 while (swap_link)
40471 if (DF_REF_IS_ARTIFICIAL (link->ref))
40472 return false;
40474 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
40475 unsigned uid2 = INSN_UID (swap_def_insn);
40477 /* Only permuting loads are allowed. */
40478 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
40479 return false;
40481 swap_link = swap_link->next;
40485 else if (purpose == FOR_STORES)
40487 df_ref def;
40488 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40490 struct df_link *swap_link = DF_REF_CHAIN (def);
40492 while (swap_link)
40494 if (DF_REF_IS_ARTIFICIAL (link->ref))
40495 return false;
40497 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
40498 unsigned uid2 = INSN_UID (swap_use_insn);
40500 /* Permuting stores or register swaps are allowed. */
40501 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
40502 return false;
40504 swap_link = swap_link->next;
40510 return true;
40513 /* Mark the xxswapdi instructions associated with permuting loads and
40514 stores for removal. Note that we only flag them for deletion here,
40515 as there is a possibility of a swap being reached from multiple
40516 loads, etc. */
40517 static void
40518 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
40520 rtx insn = insn_entry[i].insn;
40521 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40523 if (insn_entry[i].is_load)
40525 df_ref def;
40526 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40528 struct df_link *link = DF_REF_CHAIN (def);
40530 /* We know by now that these are swaps, so we can delete
40531 them confidently. */
40532 while (link)
40534 rtx use_insn = DF_REF_INSN (link->ref);
40535 insn_entry[INSN_UID (use_insn)].will_delete = 1;
40536 link = link->next;
40540 else if (insn_entry[i].is_store)
40542 df_ref use;
40543 FOR_EACH_INSN_INFO_USE (use, insn_info)
40545 /* Ignore uses for addressability. */
40546 machine_mode mode = GET_MODE (DF_REF_REG (use));
40547 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
40548 continue;
40550 struct df_link *link = DF_REF_CHAIN (use);
40552 /* We know by now that these are swaps, so we can delete
40553 them confidently. */
40554 while (link)
40556 rtx def_insn = DF_REF_INSN (link->ref);
40557 insn_entry[INSN_UID (def_insn)].will_delete = 1;
40558 link = link->next;
40564 /* OP is either a CONST_VECTOR or an expression containing one.
40565 Swap the first half of the vector with the second in the first
40566 case. Recurse to find it in the second. */
40567 static void
40568 swap_const_vector_halves (rtx op)
40570 int i;
40571 enum rtx_code code = GET_CODE (op);
40572 if (GET_CODE (op) == CONST_VECTOR)
40574 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
40575 for (i = 0; i < half_units; ++i)
40577 rtx temp = CONST_VECTOR_ELT (op, i);
40578 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
40579 CONST_VECTOR_ELT (op, i + half_units) = temp;
40582 else
40584 int j;
40585 const char *fmt = GET_RTX_FORMAT (code);
40586 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40587 if (fmt[i] == 'e' || fmt[i] == 'u')
40588 swap_const_vector_halves (XEXP (op, i));
40589 else if (fmt[i] == 'E')
40590 for (j = 0; j < XVECLEN (op, i); ++j)
40591 swap_const_vector_halves (XVECEXP (op, i, j));
40595 /* Find all subregs of a vector expression that perform a narrowing,
40596 and adjust the subreg index to account for doubleword swapping. */
40597 static void
40598 adjust_subreg_index (rtx op)
40600 enum rtx_code code = GET_CODE (op);
40601 if (code == SUBREG
40602 && (GET_MODE_SIZE (GET_MODE (op))
40603 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
40605 unsigned int index = SUBREG_BYTE (op);
40606 if (index < 8)
40607 index += 8;
40608 else
40609 index -= 8;
40610 SUBREG_BYTE (op) = index;
40613 const char *fmt = GET_RTX_FORMAT (code);
40614 int i,j;
40615 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40616 if (fmt[i] == 'e' || fmt[i] == 'u')
40617 adjust_subreg_index (XEXP (op, i));
40618 else if (fmt[i] == 'E')
40619 for (j = 0; j < XVECLEN (op, i); ++j)
40620 adjust_subreg_index (XVECEXP (op, i, j));
40623 /* Convert the non-permuting load INSN to a permuting one. */
40624 static void
40625 permute_load (rtx_insn *insn)
40627 rtx body = PATTERN (insn);
40628 rtx mem_op = SET_SRC (body);
40629 rtx tgt_reg = SET_DEST (body);
40630 machine_mode mode = GET_MODE (tgt_reg);
40631 int n_elts = GET_MODE_NUNITS (mode);
40632 int half_elts = n_elts / 2;
40633 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40634 int i, j;
40635 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40636 XVECEXP (par, 0, i) = GEN_INT (j);
40637 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40638 XVECEXP (par, 0, i) = GEN_INT (j);
40639 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
40640 SET_SRC (body) = sel;
40641 INSN_CODE (insn) = -1; /* Force re-recognition. */
40642 df_insn_rescan (insn);
40644 if (dump_file)
40645 fprintf (dump_file, "Replacing load %d with permuted load\n",
40646 INSN_UID (insn));
40649 /* Convert the non-permuting store INSN to a permuting one. */
40650 static void
40651 permute_store (rtx_insn *insn)
40653 rtx body = PATTERN (insn);
40654 rtx src_reg = SET_SRC (body);
40655 machine_mode mode = GET_MODE (src_reg);
40656 int n_elts = GET_MODE_NUNITS (mode);
40657 int half_elts = n_elts / 2;
40658 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40659 int i, j;
40660 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40661 XVECEXP (par, 0, i) = GEN_INT (j);
40662 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40663 XVECEXP (par, 0, i) = GEN_INT (j);
40664 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
40665 SET_SRC (body) = sel;
40666 INSN_CODE (insn) = -1; /* Force re-recognition. */
40667 df_insn_rescan (insn);
40669 if (dump_file)
40670 fprintf (dump_file, "Replacing store %d with permuted store\n",
40671 INSN_UID (insn));
40674 /* Given OP that contains a vector extract operation, adjust the index
40675 of the extracted lane to account for the doubleword swap. */
40676 static void
40677 adjust_extract (rtx_insn *insn)
40679 rtx pattern = PATTERN (insn);
40680 if (GET_CODE (pattern) == PARALLEL)
40681 pattern = XVECEXP (pattern, 0, 0);
40682 rtx src = SET_SRC (pattern);
40683 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40684 account for that. */
40685 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40686 rtx par = XEXP (sel, 1);
40687 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40688 int lane = INTVAL (XVECEXP (par, 0, 0));
40689 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40690 XVECEXP (par, 0, 0) = GEN_INT (lane);
40691 INSN_CODE (insn) = -1; /* Force re-recognition. */
40692 df_insn_rescan (insn);
40694 if (dump_file)
40695 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40698 /* Given OP that contains a vector direct-splat operation, adjust the index
40699 of the source lane to account for the doubleword swap. */
40700 static void
40701 adjust_splat (rtx_insn *insn)
40703 rtx body = PATTERN (insn);
40704 rtx unspec = XEXP (body, 1);
40705 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40706 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40707 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40708 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40709 INSN_CODE (insn) = -1; /* Force re-recognition. */
40710 df_insn_rescan (insn);
40712 if (dump_file)
40713 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40716 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40717 swap), reverse the order of the source operands and adjust the indices
40718 of the source lanes to account for doubleword reversal. */
40719 static void
40720 adjust_xxpermdi (rtx_insn *insn)
40722 rtx set = PATTERN (insn);
40723 rtx select = XEXP (set, 1);
40724 rtx concat = XEXP (select, 0);
40725 rtx src0 = XEXP (concat, 0);
40726 XEXP (concat, 0) = XEXP (concat, 1);
40727 XEXP (concat, 1) = src0;
40728 rtx parallel = XEXP (select, 1);
40729 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40730 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40731 int new_lane0 = 3 - lane1;
40732 int new_lane1 = 3 - lane0;
40733 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40734 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40735 INSN_CODE (insn) = -1; /* Force re-recognition. */
40736 df_insn_rescan (insn);
40738 if (dump_file)
40739 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40742 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40743 reverse the order of those inputs. */
40744 static void
40745 adjust_concat (rtx_insn *insn)
40747 rtx set = PATTERN (insn);
40748 rtx concat = XEXP (set, 1);
40749 rtx src0 = XEXP (concat, 0);
40750 XEXP (concat, 0) = XEXP (concat, 1);
40751 XEXP (concat, 1) = src0;
40752 INSN_CODE (insn) = -1; /* Force re-recognition. */
40753 df_insn_rescan (insn);
40755 if (dump_file)
40756 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40759 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40760 constant pool to reflect swapped doublewords. */
40761 static void
40762 adjust_vperm (rtx_insn *insn)
40764 /* We previously determined that the UNSPEC_VPERM was fed by a
40765 swap of a swapping load of a TOC-relative constant pool symbol.
40766 Find the MEM in the swapping load and replace it with a MEM for
40767 the adjusted mask constant. */
40768 rtx set = PATTERN (insn);
40769 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40771 /* Find the swap. */
40772 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40773 df_ref use;
40774 rtx_insn *swap_insn = 0;
40775 FOR_EACH_INSN_INFO_USE (use, insn_info)
40776 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40778 struct df_link *def_link = DF_REF_CHAIN (use);
40779 gcc_assert (def_link && !def_link->next);
40780 swap_insn = DF_REF_INSN (def_link->ref);
40781 break;
40783 gcc_assert (swap_insn);
40785 /* Find the load. */
40786 insn_info = DF_INSN_INFO_GET (swap_insn);
40787 rtx_insn *load_insn = 0;
40788 FOR_EACH_INSN_INFO_USE (use, insn_info)
40790 struct df_link *def_link = DF_REF_CHAIN (use);
40791 gcc_assert (def_link && !def_link->next);
40792 load_insn = DF_REF_INSN (def_link->ref);
40793 break;
40795 gcc_assert (load_insn);
40797 /* Find the TOC-relative symbol access. */
40798 insn_info = DF_INSN_INFO_GET (load_insn);
40799 rtx_insn *tocrel_insn = 0;
40800 FOR_EACH_INSN_INFO_USE (use, insn_info)
40802 struct df_link *def_link = DF_REF_CHAIN (use);
40803 gcc_assert (def_link && !def_link->next);
40804 tocrel_insn = DF_REF_INSN (def_link->ref);
40805 break;
40807 gcc_assert (tocrel_insn);
40809 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40810 to set tocrel_base; otherwise it would be unnecessary as we've
40811 already established it will return true. */
40812 rtx base, offset;
40813 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
40814 /* There is an extra level of indirection for small/large code models. */
40815 if (GET_CODE (tocrel_expr) == MEM)
40816 tocrel_expr = XEXP (tocrel_expr, 0);
40817 if (!toc_relative_expr_p (tocrel_expr, false))
40818 gcc_unreachable ();
40819 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40820 rtx const_vector = get_pool_constant (base);
40821 /* With the extra indirection, get_pool_constant will produce the
40822 real constant from the reg_equal expression, so get the real
40823 constant. */
40824 if (GET_CODE (const_vector) == SYMBOL_REF)
40825 const_vector = get_pool_constant (const_vector);
40826 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
40828 /* Create an adjusted mask from the initial mask. */
40829 unsigned int new_mask[16], i, val;
40830 for (i = 0; i < 16; ++i) {
40831 val = INTVAL (XVECEXP (const_vector, 0, i));
40832 if (val < 16)
40833 new_mask[i] = (val + 8) % 16;
40834 else
40835 new_mask[i] = ((val + 8) % 16) + 16;
40838 /* Create a new CONST_VECTOR and a MEM that references it. */
40839 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
40840 for (i = 0; i < 16; ++i)
40841 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
40842 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
40843 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
40844 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40845 can't recognize. Force the SYMBOL_REF into a register. */
40846 if (!REG_P (XEXP (new_mem, 0))) {
40847 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
40848 XEXP (new_mem, 0) = base_reg;
40849 /* Move the newly created insn ahead of the load insn. */
40850 rtx_insn *force_insn = get_last_insn ();
40851 remove_insn (force_insn);
40852 rtx_insn *before_load_insn = PREV_INSN (load_insn);
40853 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
40854 df_insn_rescan (before_load_insn);
40855 df_insn_rescan (force_insn);
40858 /* Replace the MEM in the load instruction and rescan it. */
40859 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
40860 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
40861 df_insn_rescan (load_insn);
40863 if (dump_file)
40864 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
40867 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40868 with special handling. Take care of that here. */
40869 static void
40870 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
40872 rtx_insn *insn = insn_entry[i].insn;
40873 rtx body = PATTERN (insn);
40875 switch (insn_entry[i].special_handling)
40877 default:
40878 gcc_unreachable ();
40879 case SH_CONST_VECTOR:
40881 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40882 gcc_assert (GET_CODE (body) == SET);
40883 rtx rhs = SET_SRC (body);
40884 swap_const_vector_halves (rhs);
40885 if (dump_file)
40886 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
40887 break;
40889 case SH_SUBREG:
40890 /* A subreg of the same size is already safe. For subregs that
40891 select a smaller portion of a reg, adjust the index for
40892 swapped doublewords. */
40893 adjust_subreg_index (body);
40894 if (dump_file)
40895 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
40896 break;
40897 case SH_NOSWAP_LD:
40898 /* Convert a non-permuting load to a permuting one. */
40899 permute_load (insn);
40900 break;
40901 case SH_NOSWAP_ST:
40902 /* Convert a non-permuting store to a permuting one. */
40903 permute_store (insn);
40904 break;
40905 case SH_EXTRACT:
40906 /* Change the lane on an extract operation. */
40907 adjust_extract (insn);
40908 break;
40909 case SH_SPLAT:
40910 /* Change the lane on a direct-splat operation. */
40911 adjust_splat (insn);
40912 break;
40913 case SH_XXPERMDI:
40914 /* Change the lanes on an XXPERMDI operation. */
40915 adjust_xxpermdi (insn);
40916 break;
40917 case SH_CONCAT:
40918 /* Reverse the order of a concatenation operation. */
40919 adjust_concat (insn);
40920 break;
40921 case SH_VPERM:
40922 /* Change the mask loaded from the constant pool for a VPERM. */
40923 adjust_vperm (insn);
40924 break;
40928 /* Find the insn from the Ith table entry, which is known to be a
40929 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40930 static void
40931 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
40933 rtx_insn *insn = insn_entry[i].insn;
40934 rtx body = PATTERN (insn);
40935 rtx src_reg = XEXP (SET_SRC (body), 0);
40936 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
40937 rtx_insn *new_insn = emit_insn_before (copy, insn);
40938 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
40939 df_insn_rescan (new_insn);
40941 if (dump_file)
40943 unsigned int new_uid = INSN_UID (new_insn);
40944 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
40947 df_insn_delete (insn);
40948 remove_insn (insn);
40949 insn->set_deleted ();
40952 /* Dump the swap table to DUMP_FILE. */
40953 static void
40954 dump_swap_insn_table (swap_web_entry *insn_entry)
40956 int e = get_max_uid ();
40957 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
40959 for (int i = 0; i < e; ++i)
40960 if (insn_entry[i].is_relevant)
40962 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
40963 fprintf (dump_file, "%6d %6d ", i,
40964 pred_entry && pred_entry->insn
40965 ? INSN_UID (pred_entry->insn) : 0);
40966 if (insn_entry[i].is_load)
40967 fputs ("load ", dump_file);
40968 if (insn_entry[i].is_store)
40969 fputs ("store ", dump_file);
40970 if (insn_entry[i].is_swap)
40971 fputs ("swap ", dump_file);
40972 if (insn_entry[i].is_live_in)
40973 fputs ("live-in ", dump_file);
40974 if (insn_entry[i].is_live_out)
40975 fputs ("live-out ", dump_file);
40976 if (insn_entry[i].contains_subreg)
40977 fputs ("subreg ", dump_file);
40978 if (insn_entry[i].is_128_int)
40979 fputs ("int128 ", dump_file);
40980 if (insn_entry[i].is_call)
40981 fputs ("call ", dump_file);
40982 if (insn_entry[i].is_swappable)
40984 fputs ("swappable ", dump_file);
40985 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
40986 fputs ("special:constvec ", dump_file);
40987 else if (insn_entry[i].special_handling == SH_SUBREG)
40988 fputs ("special:subreg ", dump_file);
40989 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
40990 fputs ("special:load ", dump_file);
40991 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
40992 fputs ("special:store ", dump_file);
40993 else if (insn_entry[i].special_handling == SH_EXTRACT)
40994 fputs ("special:extract ", dump_file);
40995 else if (insn_entry[i].special_handling == SH_SPLAT)
40996 fputs ("special:splat ", dump_file);
40997 else if (insn_entry[i].special_handling == SH_XXPERMDI)
40998 fputs ("special:xxpermdi ", dump_file);
40999 else if (insn_entry[i].special_handling == SH_CONCAT)
41000 fputs ("special:concat ", dump_file);
41001 else if (insn_entry[i].special_handling == SH_VPERM)
41002 fputs ("special:vperm ", dump_file);
41004 if (insn_entry[i].web_not_optimizable)
41005 fputs ("unoptimizable ", dump_file);
41006 if (insn_entry[i].will_delete)
41007 fputs ("delete ", dump_file);
41008 fputs ("\n", dump_file);
41010 fputs ("\n", dump_file);
41013 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
41014 Here RTX is an (& addr (const_int -16)). Always return a new copy
41015 to avoid problems with combine. */
41016 static rtx
41017 alignment_with_canonical_addr (rtx align)
41019 rtx canon;
41020 rtx addr = XEXP (align, 0);
41022 if (REG_P (addr))
41023 canon = addr;
41025 else if (GET_CODE (addr) == PLUS)
41027 rtx addrop0 = XEXP (addr, 0);
41028 rtx addrop1 = XEXP (addr, 1);
41030 if (!REG_P (addrop0))
41031 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
41033 if (!REG_P (addrop1))
41034 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
41036 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
41039 else
41040 canon = force_reg (GET_MODE (addr), addr);
41042 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
41045 /* Check whether an rtx is an alignment mask, and if so, return
41046 a fully-expanded rtx for the masking operation. */
41047 static rtx
41048 alignment_mask (rtx_insn *insn)
41050 rtx body = PATTERN (insn);
41052 if (GET_CODE (body) != SET
41053 || GET_CODE (SET_SRC (body)) != AND
41054 || !REG_P (XEXP (SET_SRC (body), 0)))
41055 return 0;
41057 rtx mask = XEXP (SET_SRC (body), 1);
41059 if (GET_CODE (mask) == CONST_INT)
41061 if (INTVAL (mask) == -16)
41062 return alignment_with_canonical_addr (SET_SRC (body));
41063 else
41064 return 0;
41067 if (!REG_P (mask))
41068 return 0;
41070 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41071 df_ref use;
41072 rtx real_mask = 0;
41074 FOR_EACH_INSN_INFO_USE (use, insn_info)
41076 if (!rtx_equal_p (DF_REF_REG (use), mask))
41077 continue;
41079 struct df_link *def_link = DF_REF_CHAIN (use);
41080 if (!def_link || def_link->next)
41081 return 0;
41083 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
41084 rtx const_body = PATTERN (const_insn);
41085 if (GET_CODE (const_body) != SET)
41086 return 0;
41088 real_mask = SET_SRC (const_body);
41090 if (GET_CODE (real_mask) != CONST_INT
41091 || INTVAL (real_mask) != -16)
41092 return 0;
41095 if (real_mask == 0)
41096 return 0;
41098 return alignment_with_canonical_addr (SET_SRC (body));
41101 /* Given INSN that's a load or store based at BASE_REG, look for a
41102 feeding computation that aligns its address on a 16-byte boundary. */
41103 static rtx
41104 find_alignment_op (rtx_insn *insn, rtx base_reg)
41106 df_ref base_use;
41107 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41108 rtx and_operation = 0;
41110 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41112 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41113 continue;
41115 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41116 if (!base_def_link || base_def_link->next)
41117 break;
41119 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
41120 and_operation = alignment_mask (and_insn);
41121 if (and_operation != 0)
41122 break;
41125 return and_operation;
41128 struct del_info { bool replace; rtx_insn *replace_insn; };
41130 /* If INSN is the load for an lvx pattern, put it in canonical form. */
41131 static void
41132 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
41134 rtx body = PATTERN (insn);
41135 gcc_assert (GET_CODE (body) == SET
41136 && GET_CODE (SET_SRC (body)) == VEC_SELECT
41137 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
41139 rtx mem = XEXP (SET_SRC (body), 0);
41140 rtx base_reg = XEXP (mem, 0);
41142 rtx and_operation = find_alignment_op (insn, base_reg);
41144 if (and_operation != 0)
41146 df_ref def;
41147 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41148 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41150 struct df_link *link = DF_REF_CHAIN (def);
41151 if (!link || link->next)
41152 break;
41154 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41155 if (!insn_is_swap_p (swap_insn)
41156 || insn_is_load_p (swap_insn)
41157 || insn_is_store_p (swap_insn))
41158 break;
41160 /* Expected lvx pattern found. Change the swap to
41161 a copy, and propagate the AND operation into the
41162 load. */
41163 to_delete[INSN_UID (swap_insn)].replace = true;
41164 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41166 XEXP (mem, 0) = and_operation;
41167 SET_SRC (body) = mem;
41168 INSN_CODE (insn) = -1; /* Force re-recognition. */
41169 df_insn_rescan (insn);
41171 if (dump_file)
41172 fprintf (dump_file, "lvx opportunity found at %d\n",
41173 INSN_UID (insn));
41178 /* If INSN is the store for an stvx pattern, put it in canonical form. */
41179 static void
41180 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
41182 rtx body = PATTERN (insn);
41183 gcc_assert (GET_CODE (body) == SET
41184 && GET_CODE (SET_DEST (body)) == MEM
41185 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
41186 rtx mem = SET_DEST (body);
41187 rtx base_reg = XEXP (mem, 0);
41189 rtx and_operation = find_alignment_op (insn, base_reg);
41191 if (and_operation != 0)
41193 rtx src_reg = XEXP (SET_SRC (body), 0);
41194 df_ref src_use;
41195 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41196 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
41198 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
41199 continue;
41201 struct df_link *link = DF_REF_CHAIN (src_use);
41202 if (!link || link->next)
41203 break;
41205 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
41206 if (!insn_is_swap_p (swap_insn)
41207 || insn_is_load_p (swap_insn)
41208 || insn_is_store_p (swap_insn))
41209 break;
41211 /* Expected stvx pattern found. Change the swap to
41212 a copy, and propagate the AND operation into the
41213 store. */
41214 to_delete[INSN_UID (swap_insn)].replace = true;
41215 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
41217 XEXP (mem, 0) = and_operation;
41218 SET_SRC (body) = src_reg;
41219 INSN_CODE (insn) = -1; /* Force re-recognition. */
41220 df_insn_rescan (insn);
41222 if (dump_file)
41223 fprintf (dump_file, "stvx opportunity found at %d\n",
41224 INSN_UID (insn));
41229 /* Look for patterns created from builtin lvx and stvx calls, and
41230 canonicalize them to be properly recognized as such. */
41231 static void
41232 recombine_lvx_stvx_patterns (function *fun)
41234 int i;
41235 basic_block bb;
41236 rtx_insn *insn;
41238 int num_insns = get_max_uid ();
41239 del_info *to_delete = XCNEWVEC (del_info, num_insns);
41241 FOR_ALL_BB_FN (bb, fun)
41242 FOR_BB_INSNS (bb, insn)
41244 if (!NONDEBUG_INSN_P (insn))
41245 continue;
41247 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
41248 recombine_lvx_pattern (insn, to_delete);
41249 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
41250 recombine_stvx_pattern (insn, to_delete);
41253 /* Turning swaps into copies is delayed until now, to avoid problems
41254 with deleting instructions during the insn walk. */
41255 for (i = 0; i < num_insns; i++)
41256 if (to_delete[i].replace)
41258 rtx swap_body = PATTERN (to_delete[i].replace_insn);
41259 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
41260 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
41261 rtx_insn *new_insn = emit_insn_before (copy,
41262 to_delete[i].replace_insn);
41263 set_block_for_insn (new_insn,
41264 BLOCK_FOR_INSN (to_delete[i].replace_insn));
41265 df_insn_rescan (new_insn);
41266 df_insn_delete (to_delete[i].replace_insn);
41267 remove_insn (to_delete[i].replace_insn);
41268 to_delete[i].replace_insn->set_deleted ();
41271 free (to_delete);
41274 /* Main entry point for this pass. */
41275 unsigned int
41276 rs6000_analyze_swaps (function *fun)
41278 swap_web_entry *insn_entry;
41279 basic_block bb;
41280 rtx_insn *insn, *curr_insn = 0;
41282 /* Dataflow analysis for use-def chains. */
41283 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
41284 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
41285 df_analyze ();
41286 df_set_flags (DF_DEFER_INSN_RESCAN);
41288 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
41289 recombine_lvx_stvx_patterns (fun);
41291 /* Allocate structure to represent webs of insns. */
41292 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
41294 /* Walk the insns to gather basic data. */
41295 FOR_ALL_BB_FN (bb, fun)
41296 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
41298 unsigned int uid = INSN_UID (insn);
41299 if (NONDEBUG_INSN_P (insn))
41301 insn_entry[uid].insn = insn;
41303 if (GET_CODE (insn) == CALL_INSN)
41304 insn_entry[uid].is_call = 1;
41306 /* Walk the uses and defs to see if we mention vector regs.
41307 Record any constraints on optimization of such mentions. */
41308 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41309 df_ref mention;
41310 FOR_EACH_INSN_INFO_USE (mention, insn_info)
41312 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41313 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41315 /* If a use gets its value from a call insn, it will be
41316 a hard register and will look like (reg:V4SI 3 3).
41317 The df analysis creates two mentions for GPR3 and GPR4,
41318 both DImode. We must recognize this and treat it as a
41319 vector mention to ensure the call is unioned with this
41320 use. */
41321 if (mode == DImode && DF_REF_INSN_INFO (mention))
41323 rtx feeder = DF_REF_INSN (mention);
41324 /* FIXME: It is pretty hard to get from the df mention
41325 to the mode of the use in the insn. We arbitrarily
41326 pick a vector mode here, even though the use might
41327 be a real DImode. We can be too conservative
41328 (create a web larger than necessary) because of
41329 this, so consider eventually fixing this. */
41330 if (GET_CODE (feeder) == CALL_INSN)
41331 mode = V4SImode;
41334 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41336 insn_entry[uid].is_relevant = 1;
41337 if (mode == TImode || mode == V1TImode
41338 || FLOAT128_VECTOR_P (mode))
41339 insn_entry[uid].is_128_int = 1;
41340 if (DF_REF_INSN_INFO (mention))
41341 insn_entry[uid].contains_subreg
41342 = !rtx_equal_p (DF_REF_REG (mention),
41343 DF_REF_REAL_REG (mention));
41344 union_defs (insn_entry, insn, mention);
41347 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
41349 /* We use DF_REF_REAL_REG here to get inside any subregs. */
41350 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
41352 /* If we're loading up a hard vector register for a call,
41353 it looks like (set (reg:V4SI 9 9) (...)). The df
41354 analysis creates two mentions for GPR9 and GPR10, both
41355 DImode. So relying on the mode from the mentions
41356 isn't sufficient to ensure we union the call into the
41357 web with the parameter setup code. */
41358 if (mode == DImode && GET_CODE (insn) == SET
41359 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
41360 mode = GET_MODE (SET_DEST (insn));
41362 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
41364 insn_entry[uid].is_relevant = 1;
41365 if (mode == TImode || mode == V1TImode
41366 || FLOAT128_VECTOR_P (mode))
41367 insn_entry[uid].is_128_int = 1;
41368 if (DF_REF_INSN_INFO (mention))
41369 insn_entry[uid].contains_subreg
41370 = !rtx_equal_p (DF_REF_REG (mention),
41371 DF_REF_REAL_REG (mention));
41372 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
41373 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
41374 insn_entry[uid].is_live_out = 1;
41375 union_uses (insn_entry, insn, mention);
41379 if (insn_entry[uid].is_relevant)
41381 /* Determine if this is a load or store. */
41382 insn_entry[uid].is_load = insn_is_load_p (insn);
41383 insn_entry[uid].is_store = insn_is_store_p (insn);
41385 /* Determine if this is a doubleword swap. If not,
41386 determine whether it can legally be swapped. */
41387 if (insn_is_swap_p (insn))
41388 insn_entry[uid].is_swap = 1;
41389 else
41391 unsigned int special = SH_NONE;
41392 insn_entry[uid].is_swappable
41393 = insn_is_swappable_p (insn_entry, insn, &special);
41394 if (special != SH_NONE && insn_entry[uid].contains_subreg)
41395 insn_entry[uid].is_swappable = 0;
41396 else if (special != SH_NONE)
41397 insn_entry[uid].special_handling = special;
41398 else if (insn_entry[uid].contains_subreg)
41399 insn_entry[uid].special_handling = SH_SUBREG;
41405 if (dump_file)
41407 fprintf (dump_file, "\nSwap insn entry table when first built\n");
41408 dump_swap_insn_table (insn_entry);
41411 /* Record unoptimizable webs. */
41412 unsigned e = get_max_uid (), i;
41413 for (i = 0; i < e; ++i)
41415 if (!insn_entry[i].is_relevant)
41416 continue;
41418 swap_web_entry *root
41419 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
41421 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
41422 || (insn_entry[i].contains_subreg
41423 && insn_entry[i].special_handling != SH_SUBREG)
41424 || insn_entry[i].is_128_int || insn_entry[i].is_call
41425 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
41426 root->web_not_optimizable = 1;
41428 /* If we have loads or stores that aren't permuting then the
41429 optimization isn't appropriate. */
41430 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
41431 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
41432 root->web_not_optimizable = 1;
41434 /* If we have permuting loads or stores that are not accompanied
41435 by a register swap, the optimization isn't appropriate. */
41436 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
41438 rtx insn = insn_entry[i].insn;
41439 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41440 df_ref def;
41442 FOR_EACH_INSN_INFO_DEF (def, insn_info)
41444 struct df_link *link = DF_REF_CHAIN (def);
41446 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
41448 root->web_not_optimizable = 1;
41449 break;
41453 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
41455 rtx insn = insn_entry[i].insn;
41456 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41457 df_ref use;
41459 FOR_EACH_INSN_INFO_USE (use, insn_info)
41461 struct df_link *link = DF_REF_CHAIN (use);
41463 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
41465 root->web_not_optimizable = 1;
41466 break;
41472 if (dump_file)
41474 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
41475 dump_swap_insn_table (insn_entry);
41478 /* For each load and store in an optimizable web (which implies
41479 the loads and stores are permuting), find the associated
41480 register swaps and mark them for removal. Due to various
41481 optimizations we may mark the same swap more than once. Also
41482 perform special handling for swappable insns that require it. */
41483 for (i = 0; i < e; ++i)
41484 if ((insn_entry[i].is_load || insn_entry[i].is_store)
41485 && insn_entry[i].is_swap)
41487 swap_web_entry* root_entry
41488 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41489 if (!root_entry->web_not_optimizable)
41490 mark_swaps_for_removal (insn_entry, i);
41492 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
41494 swap_web_entry* root_entry
41495 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
41496 if (!root_entry->web_not_optimizable)
41497 handle_special_swappables (insn_entry, i);
41500 /* Now delete the swaps marked for removal. */
41501 for (i = 0; i < e; ++i)
41502 if (insn_entry[i].will_delete)
41503 replace_swap_with_copy (insn_entry, i);
41505 /* Clean up. */
41506 free (insn_entry);
41507 return 0;
41510 const pass_data pass_data_analyze_swaps =
41512 RTL_PASS, /* type */
41513 "swaps", /* name */
41514 OPTGROUP_NONE, /* optinfo_flags */
41515 TV_NONE, /* tv_id */
41516 0, /* properties_required */
41517 0, /* properties_provided */
41518 0, /* properties_destroyed */
41519 0, /* todo_flags_start */
41520 TODO_df_finish, /* todo_flags_finish */
41523 class pass_analyze_swaps : public rtl_opt_pass
41525 public:
41526 pass_analyze_swaps(gcc::context *ctxt)
41527 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
41530 /* opt_pass methods: */
41531 virtual bool gate (function *)
41533 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
41534 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
41537 virtual unsigned int execute (function *fun)
41539 return rs6000_analyze_swaps (fun);
41542 }; // class pass_analyze_swaps
41544 rtl_opt_pass *
41545 make_pass_analyze_swaps (gcc::context *ctxt)
41547 return new pass_analyze_swaps (ctxt);
41550 #ifdef RS6000_GLIBC_ATOMIC_FENV
41551 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
41552 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
41553 #endif
41555 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
41557 static void
41558 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
41560 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
41562 #ifdef RS6000_GLIBC_ATOMIC_FENV
41563 if (atomic_hold_decl == NULL_TREE)
41565 atomic_hold_decl
41566 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41567 get_identifier ("__atomic_feholdexcept"),
41568 build_function_type_list (void_type_node,
41569 double_ptr_type_node,
41570 NULL_TREE));
41571 TREE_PUBLIC (atomic_hold_decl) = 1;
41572 DECL_EXTERNAL (atomic_hold_decl) = 1;
41575 if (atomic_clear_decl == NULL_TREE)
41577 atomic_clear_decl
41578 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41579 get_identifier ("__atomic_feclearexcept"),
41580 build_function_type_list (void_type_node,
41581 NULL_TREE));
41582 TREE_PUBLIC (atomic_clear_decl) = 1;
41583 DECL_EXTERNAL (atomic_clear_decl) = 1;
41586 tree const_double = build_qualified_type (double_type_node,
41587 TYPE_QUAL_CONST);
41588 tree const_double_ptr = build_pointer_type (const_double);
41589 if (atomic_update_decl == NULL_TREE)
41591 atomic_update_decl
41592 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
41593 get_identifier ("__atomic_feupdateenv"),
41594 build_function_type_list (void_type_node,
41595 const_double_ptr,
41596 NULL_TREE));
41597 TREE_PUBLIC (atomic_update_decl) = 1;
41598 DECL_EXTERNAL (atomic_update_decl) = 1;
41601 tree fenv_var = create_tmp_var_raw (double_type_node);
41602 TREE_ADDRESSABLE (fenv_var) = 1;
41603 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
41605 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
41606 *clear = build_call_expr (atomic_clear_decl, 0);
41607 *update = build_call_expr (atomic_update_decl, 1,
41608 fold_convert (const_double_ptr, fenv_addr));
41609 #endif
41610 return;
41613 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
41614 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
41615 tree call_mffs = build_call_expr (mffs, 0);
41617 /* Generates the equivalent of feholdexcept (&fenv_var)
41619 *fenv_var = __builtin_mffs ();
41620 double fenv_hold;
41621 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
41622 __builtin_mtfsf (0xff, fenv_hold); */
41624 /* Mask to clear everything except for the rounding modes and non-IEEE
41625 arithmetic flag. */
41626 const unsigned HOST_WIDE_INT hold_exception_mask =
41627 HOST_WIDE_INT_C (0xffffffff00000007);
41629 tree fenv_var = create_tmp_var_raw (double_type_node);
41631 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
41633 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
41634 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41635 build_int_cst (uint64_type_node,
41636 hold_exception_mask));
41638 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41639 fenv_llu_and);
41641 tree hold_mtfsf = build_call_expr (mtfsf, 2,
41642 build_int_cst (unsigned_type_node, 0xff),
41643 fenv_hold_mtfsf);
41645 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
41647 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
41649 double fenv_clear = __builtin_mffs ();
41650 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
41651 __builtin_mtfsf (0xff, fenv_clear); */
41653 /* Mask to clear everything except for the rounding modes and non-IEEE
41654 arithmetic flag. */
41655 const unsigned HOST_WIDE_INT clear_exception_mask =
41656 HOST_WIDE_INT_C (0xffffffff00000000);
41658 tree fenv_clear = create_tmp_var_raw (double_type_node);
41660 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
41662 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
41663 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
41664 fenv_clean_llu,
41665 build_int_cst (uint64_type_node,
41666 clear_exception_mask));
41668 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41669 fenv_clear_llu_and);
41671 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41672 build_int_cst (unsigned_type_node, 0xff),
41673 fenv_clear_mtfsf);
41675 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41677 /* Generates the equivalent of feupdateenv (&fenv_var)
41679 double old_fenv = __builtin_mffs ();
41680 double fenv_update;
41681 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41682 (*(uint64_t*)fenv_var 0x1ff80fff);
41683 __builtin_mtfsf (0xff, fenv_update); */
41685 const unsigned HOST_WIDE_INT update_exception_mask =
41686 HOST_WIDE_INT_C (0xffffffff1fffff00);
41687 const unsigned HOST_WIDE_INT new_exception_mask =
41688 HOST_WIDE_INT_C (0x1ff80fff);
41690 tree old_fenv = create_tmp_var_raw (double_type_node);
41691 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41693 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41694 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41695 build_int_cst (uint64_type_node,
41696 update_exception_mask));
41698 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41699 build_int_cst (uint64_type_node,
41700 new_exception_mask));
41702 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41703 old_llu_and, new_llu_and);
41705 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41706 new_llu_mask);
41708 tree update_mtfsf = build_call_expr (mtfsf, 2,
41709 build_int_cst (unsigned_type_node, 0xff),
41710 fenv_update_mtfsf);
41712 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41715 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41717 static bool
41718 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41719 optimization_type opt_type)
41721 switch (op)
41723 case rsqrt_optab:
41724 return (opt_type == OPTIMIZE_FOR_SPEED
41725 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41727 default:
41728 return true;
41732 struct gcc_target targetm = TARGET_INITIALIZER;
41734 #include "gt-rs6000.h"